Exemple #1
0
def get_hostname(config=None):
    """
    Get the canonical host name this agent should identify as. This is
    the authoritative source of the host name for the agent.

    Tries, in order:

      * agent config (datadog.conf, "hostname:")
      * 'hostname -f' (on unix)
      * socket.gethostname()
    """
    from utils.dockerutil import DockerUtil
    hostname = None

    # first, try the config
    if config is None:
        from config import get_config
        config = get_config(parse_args=True)
    config_hostname = config.get('hostname')
    if config_hostname and is_valid_hostname(config_hostname):
        return config_hostname

    # Try to get GCE instance name
    if hostname is None:
        gce_hostname = GCE.get_hostname(config)
        if gce_hostname is not None:
            if is_valid_hostname(gce_hostname):
                return gce_hostname

    # Try to get the docker hostname
    if hostname is None and DockerUtil.is_dockerized():
        docker_util = DockerUtil(agentConfig=config)
        docker_hostname = docker_util.get_hostname()
        if docker_hostname is not None and is_valid_hostname(docker_hostname):
            hostname = docker_hostname

    # then move on to os-specific detection
    if hostname is None:
        def _get_hostname_unix():
            try:
                # try fqdn
                out, _, rtcode = get_subprocess_output(['/bin/hostname', '-f'], log)
                if rtcode == 0:
                    return out.strip()
            except Exception:
                return None

        os_name = get_os()
        if os_name in ['mac', 'freebsd', 'linux', 'solaris']:
            unix_hostname = _get_hostname_unix()
            if unix_hostname and is_valid_hostname(unix_hostname):
                hostname = unix_hostname

    # if we have an ec2 default hostname, see if there's an instance-id available
    if (Platform.is_ecs_instance()) or (hostname is not None and EC2.is_default(hostname)):
        instanceid = EC2.get_instance_id(config)
        if instanceid:
            hostname = instanceid

    # fall back on socket.gethostname(), socket.getfqdn() is too unreliable
    if hostname is None:
        try:
            socket_hostname = socket.gethostname()
        except socket.error:
            socket_hostname = None
        if socket_hostname and is_valid_hostname(socket_hostname):
            hostname = socket_hostname

    if hostname is None:
        log.critical('Unable to reliably determine host name. You can define one in datadog.conf or in your hosts file')
        raise Exception('Unable to reliably determine host name. You can define one in datadog.conf or in your hosts file')
    else:
        return hostname
Exemple #2
0
def get_hostname(config=None):
    """
    Get the canonical host name this agent should identify as. This is
    the authoritative source of the host name for the agent.

    Tries, in order:

      * agent config (datadog.conf, "hostname:")
      * 'hostname -f' (on unix)
      * socket.gethostname()
    """
    hostname = None

    # first, try the config
    if config is None:
        from config import get_config
        config = get_config(parse_args=True)
    config_hostname = config.get('hostname')
    if config_hostname and is_valid_hostname(config_hostname):
        return config_hostname

    # Try to get GCE instance name
    if hostname is None:
        gce_hostname = GCE.get_hostname(config)
        if gce_hostname is not None:
            if is_valid_hostname(gce_hostname):
                return gce_hostname

    # Try to get the docker hostname
    docker_util = DockerUtil()
    if hostname is None and docker_util.is_dockerized():
        docker_hostname = docker_util.get_hostname()
        if docker_hostname is not None and is_valid_hostname(docker_hostname):
            return docker_hostname

    # then move on to os-specific detection
    if hostname is None:
        def _get_hostname_unix():
            try:
                # try fqdn
                out, _, rtcode = get_subprocess_output(['/bin/hostname', '-f'], log)
                if rtcode == 0:
                    return out.strip()
            except Exception:
                return None

        os_name = get_os()
        if os_name in ['mac', 'freebsd', 'linux', 'solaris']:
            unix_hostname = _get_hostname_unix()
            if unix_hostname and is_valid_hostname(unix_hostname):
                hostname = unix_hostname

    # if the host is an ECS worker, or has an EC2 hostname
    # or it's a windows machine and the EC2 config service folder exists
    # try and find an EC2 instance ID
    if (Platform.is_ecs_instance()) or \
       (hostname is not None and True in [hostname.lower().startswith(p) for p in [u'ip-', u'domu']]) or \
       (os_name == 'windows' and os.path.exists('C:\Program Files\Amazon\Ec2ConfigService')):
        instanceid = EC2.get_instance_id(config)
        if instanceid:
            hostname = instanceid

    # fall back on socket.gethostname(), socket.getfqdn() is too unreliable
    if hostname is None:
        try:
            socket_hostname = socket.gethostname()
        except socket.error:
            socket_hostname = None
        if socket_hostname and is_valid_hostname(socket_hostname):
            hostname = socket_hostname

    if hostname is None:
        log.critical('Unable to reliably determine host name. You can define one in datadog.conf or in your hosts file')
        raise Exception('Unable to reliably determine host name. You can define one in datadog.conf or in your hosts file')
    else:
        return hostname
Exemple #3
0
class KubeUtil():
    __metaclass__ = Singleton

    DEFAULT_METHOD = 'http'
    METRICS_PATH = '/api/v1.3/subcontainers/'
    PODS_LIST_PATH = '/pods/'
    DEFAULT_CADVISOR_PORT = 4194
    DEFAULT_KUBELET_PORT = 10255
    DEFAULT_MASTER_PORT = 8080

    POD_NAME_LABEL = "io.kubernetes.pod.name"
    NAMESPACE_LABEL = "io.kubernetes.pod.namespace"

    def __init__(self):
        self.docker_util = DockerUtil()
        try:
            config_file_path = get_conf_path(KUBERNETES_CHECK_NAME)
            check_config = check_yaml(config_file_path)
            instance = check_config['instances'][0]
        # kubernetes.yaml was not found
        except IOError as ex:
            log.error(ex.message)
            instance = {}
        except Exception:
            log.error('Kubernetes configuration file is invalid. '
                      'Trying connecting to kubelet with default settings anyway...')
            instance = {}

        self.method = instance.get('method', KubeUtil.DEFAULT_METHOD)
        self.host = instance.get("host") or self.docker_util.get_hostname()

        self.cadvisor_port = instance.get('port', KubeUtil.DEFAULT_CADVISOR_PORT)
        self.kubelet_port = instance.get('kubelet_port', KubeUtil.DEFAULT_KUBELET_PORT)

        self.metrics_url = urljoin(
            '%s://%s:%d' % (self.method, self.host, self.cadvisor_port), KubeUtil.METRICS_PATH)
        self.pods_list_url = urljoin(
            '%s://%s:%d' % (self.method, self.host, self.kubelet_port), KubeUtil.PODS_LIST_PATH)

        self.kube_health_url = '%s://%s:%d/healthz' % (self.method, self.host, self.kubelet_port)

    def get_kube_labels(self, excluded_keys=None):
        pods = retrieve_json(self.pods_list_url)
        return self.extract_kube_labels(pods, excluded_keys=excluded_keys)

    def extract_kube_labels(self, pods_list, excluded_keys=None):
        """
        Extract labels from a list of pods coming from
        the kubelet API.
        """
        excluded_keys = excluded_keys or []
        kube_labels = defaultdict(list)
        pod_items = pods_list.get("items") or []
        for pod in pod_items:
            metadata = pod.get("metadata", {})
            name = metadata.get("name")
            namespace = metadata.get("namespace")
            labels = metadata.get("labels")
            if name and labels and namespace:
                key = "%s/%s" % (namespace, name)

                for k, v in labels.iteritems():
                    if k in excluded_keys:
                        continue

                    kube_labels[key].append(u"kube_%s:%s" % (k, v))

        return kube_labels

    def retrieve_pods_list(self):
        return retrieve_json(self.pods_list_url)
Exemple #4
0
class KubeUtil:
    __metaclass__ = Singleton

    DEFAULT_METHOD = 'http'
    MACHINE_INFO_PATH = '/api/v1.3/machine/'
    METRICS_PATH = '/api/v1.3/subcontainers/'
    PODS_LIST_PATH = '/pods/'
    DEFAULT_CADVISOR_PORT = 4194
    DEFAULT_KUBELET_PORT = 10255
    DEFAULT_MASTER_PORT = 8080
    DEFAULT_MASTER_NAME = 'kubernetes'  # DNS name to reach the master from a pod.
    CA_CRT_PATH = '/var/run/secrets/kubernetes.io/serviceaccount/ca.crt'
    AUTH_TOKEN_PATH = '/var/run/secrets/kubernetes.io/serviceaccount/token'

    POD_NAME_LABEL = "io.kubernetes.pod.name"
    NAMESPACE_LABEL = "io.kubernetes.pod.namespace"

    def __init__(self, instance=None):
        self.docker_util = DockerUtil()
        if instance is None:
            try:
                config_file_path = get_conf_path(KUBERNETES_CHECK_NAME)
                check_config = check_yaml(config_file_path)
                instance = check_config['instances'][0]
            # kubernetes.yaml was not found
            except IOError as ex:
                log.error(ex.message)
                instance = {}
            except Exception:
                log.error('Kubernetes configuration file is invalid. '
                          'Trying connecting to kubelet with default settings anyway...')
                instance = {}

        self.method = instance.get('method', KubeUtil.DEFAULT_METHOD)
        self.host = instance.get("host") or self.docker_util.get_hostname()
        self.kubelet_host = os.environ.get('KUBERNETES_KUBELET_HOST') or self.host
        self._node_ip = self._node_name = None  # lazy evaluation
        self.host_name = os.environ.get('HOSTNAME')

        self.cadvisor_port = instance.get('port', KubeUtil.DEFAULT_CADVISOR_PORT)
        self.kubelet_port = instance.get('kubelet_port', KubeUtil.DEFAULT_KUBELET_PORT)

        self.kubelet_api_url = '%s://%s:%d' % (self.method, self.kubelet_host, self.kubelet_port)
        self.cadvisor_url = '%s://%s:%d' % (self.method, self.kubelet_host, self.cadvisor_port)
        self.kubernetes_api_url = 'https://%s/api/v1' % (os.environ.get('KUBERNETES_SERVICE_HOST') or self.DEFAULT_MASTER_NAME)
        self.tls_settings = self._init_tls_settings(instance)

        self.metrics_url = urljoin(self.cadvisor_url, KubeUtil.METRICS_PATH)
        self.machine_info_url = urljoin(self.cadvisor_url, KubeUtil.MACHINE_INFO_PATH)
        self.pods_list_url = urljoin(self.kubelet_api_url, KubeUtil.PODS_LIST_PATH)
        self.kube_health_url = urljoin(self.kubelet_api_url, 'healthz')

        # keep track of the latest k8s event we collected and posted
        # default value is 0 but TTL for k8s events is one hour anyways
        self.last_event_collection_ts = 0

    def _init_tls_settings(self, instance):
        """
        Initialize TLS settings for connection to apiserver and kubelet.
        """
        tls_settings = {}

        client_crt = instance.get('apiserver_client_crt')
        client_key = instance.get('apiserver_client_key')
        apiserver_cacert = instance.get('apiserver_ca_cert')

        if client_crt and client_key and os.path.exists(client_crt) and os.path.exists(client_key):
            tls_settings['apiserver_client_cert'] = (client_crt, client_key)

        if apiserver_cacert and os.path.exists(apiserver_cacert):
            tls_settings['apiserver_cacert'] = apiserver_cacert

        token = self.get_auth_token()
        if token:
            tls_settings['bearer_token'] = token

        return tls_settings

    def get_kube_labels(self, excluded_keys=None):
        pods = self.retrieve_pods_list()
        return self.extract_kube_labels(pods, excluded_keys=excluded_keys)

    def extract_kube_labels(self, pods_list, excluded_keys=None):
        """
        Extract labels from a list of pods coming from
        the kubelet API.
        """
        excluded_keys = excluded_keys or []
        kube_labels = defaultdict(list)
        pod_items = pods_list.get("items") or []
        for pod in pod_items:
            metadata = pod.get("metadata", {})
            name = metadata.get("name")
            namespace = metadata.get("namespace")
            labels = metadata.get("labels")
            if name and labels and namespace:
                key = "%s/%s" % (namespace, name)

                for k, v in labels.iteritems():
                    if k in excluded_keys:
                        continue

                    kube_labels[key].append(u"kube_%s:%s" % (k, v))

        return kube_labels

    def extract_meta(self, pods_list, field_name):
        """
        Exctract fields like `uid` or `name` from the `metadata` section of a
        list of pods coming from the kubelet API.

        TODO: currently not in use, was added to support events filtering, consider to remove it.
        """
        uids = []
        pods = pods_list.get("items") or []
        for p in pods:
            value = p.get('metadata', {}).get(field_name)
            if value is not None:
                uids.append(value)
        return uids

    def retrieve_pods_list(self):
        """
        Retrieve the list of pods for this cluster querying the kubelet API.

        TODO: the list of pods could be cached with some policy to be decided.
        """
        return retrieve_json(self.pods_list_url)

    def retrieve_machine_info(self):
        """
        Retrieve machine info from Cadvisor.
        """
        return retrieve_json(self.machine_info_url)

    def retrieve_metrics(self):
        """
        Retrieve metrics from Cadvisor.
        """
        return retrieve_json(self.metrics_url)

    def filter_pods_list(self, pods_list, host_ip):
        """
        Filter out (in place) pods that are not running on the given host.

        TODO: currently not in use, was added to support events filtering, consider to remove it.
        """
        pod_items = pods_list.get('items') or []
        log.debug('Found {} pods to filter'.format(len(pod_items)))

        filtered_pods = []
        for pod in pod_items:
            status = pod.get('status', {})
            if status.get('hostIP') == host_ip:
                filtered_pods.append(pod)
        log.debug('Pods after filtering: {}'.format(len(filtered_pods)))

        pods_list['items'] = filtered_pods
        return pods_list

    def retrieve_json_auth(self, url, timeout=10):
        """
        Kubernetes API requires authentication using a token available in
        every pod, or with a client X509 cert/key pair.
        We authenticate using the service account token by default
        and replace this behavior with cert authentication if the user provided
        a cert/key pair in the instance.

        We try to verify the server TLS cert if the public cert is available.
        """
        verify = self.tls_settings.get('apiserver_cacert')
        if not verify:
            verify = self.CA_CRT_PATH if os.path.exists(self.CA_CRT_PATH) else False
        log.debug('ssl validation: {}'.format(verify))

        cert = self.tls_settings.get('apiserver_client_cert')
        bearer_token = self.tls_settings.get('bearer_token') if not cert else None
        headers = {'Authorization': 'Bearer {}'.format(bearer_token)} if bearer_token else None

        r = requests.get(url, timeout=timeout, headers=headers, verify=verify, cert=cert)
        r.raise_for_status()
        return r.json()

    def get_node_info(self):
        """
        Return the IP address and the hostname of the node where the pod is running.
        """
        if None in (self._node_ip, self._node_name):
            self._fetch_host_data()
        return self._node_ip, self._node_name

    def _fetch_host_data(self):
        """
        Retrieve host name and IP address from the payload returned by the listing
        pods endpoints from kubelet or kubernetes API.

        The host IP address is different from the default router for the pod.
        """
        try:
            pod_items = self.retrieve_pods_list().get("items") or []
        except Exception as e:
            log.warning("Unable to retrieve pod list %s. Not fetching host data", str(e))
            return

        for pod in pod_items:
            metadata = pod.get("metadata", {})
            name = metadata.get("name")
            if name == self.host_name:
                status = pod.get('status', {})
                spec = pod.get('spec', {})
                # if not found, use an empty string - we use None as "not initialized"
                self._node_ip = status.get('hostIP', '')
                self._node_name = spec.get('nodeName', '')
                break

    def extract_event_tags(self, event):
        """
        Return a list of tags extracted from an event object
        """
        tags = []

        if 'reason' in event:
            tags.append('reason:%s' % event.get('reason', '').lower())
        if 'namespace' in event.get('metadata', {}):
            tags.append('namespace:%s' % event['metadata']['namespace'])
        if 'host' in event.get('source', {}):
            tags.append('node_name:%s' % event['source']['host'])
        if 'kind' in event.get('involvedObject', {}):
            tags.append('object_type:%s' % event['involvedObject'].get('kind', '').lower())

        return tags

    def are_tags_filtered(self, tags):
        """
        Because it is a pain to call it from the kubernetes check otherwise.
        """
        return self.docker_util.are_tags_filtered(tags)

    @classmethod
    def get_auth_token(cls):
        """
        Return a string containing the authorization token for the pod.
        """
        try:
            with open(cls.AUTH_TOKEN_PATH) as f:
                return f.read()
        except IOError as e:
            log.error('Unable to read token from {}: {}'.format(cls.AUTH_TOKEN_PATH, e))

        return None
Exemple #5
0
def get_hostname(config=None):
    """
    Get the canonical host name this agent should identify as. This is
    the authoritative source of the host name for the agent.

    Tries, in order:

      * agent config (datadog.conf, "hostname:")
      * 'hostname -f' (on unix)
      * socket.gethostname()
    """
    hostname = None

    # first, try the config
    if config is None:
        from config import get_config
        config = get_config(parse_args=True)
    config_hostname = config.get('hostname')
    if config_hostname and is_valid_hostname(config_hostname):
        return config_hostname

    # Try to get GCE instance name
    gce_hostname = GCE.get_hostname(config)
    if gce_hostname is not None:
        if is_valid_hostname(gce_hostname):
            return gce_hostname

    # Try to get the docker hostname
    if Platform.is_containerized():

        # First we try from the Docker API
        docker_util = DockerUtil()
        docker_hostname = docker_util.get_hostname(use_default_gw=False)
        if docker_hostname is not None and is_valid_hostname(docker_hostname):
            hostname = docker_hostname

        elif Platform.is_k8s(): # Let's try from the kubelet
            kube_util = KubeUtil()
            _, kube_hostname = kube_util.get_node_info()
            if kube_hostname is not None and is_valid_hostname(kube_hostname):
                hostname = kube_hostname

    # then move on to os-specific detection
    if hostname is None:
        if Platform.is_unix() or Platform.is_solaris():
            unix_hostname = _get_hostname_unix()
            if unix_hostname and is_valid_hostname(unix_hostname):
                hostname = unix_hostname

    # if we have an ec2 default hostname, see if there's an instance-id available
    if (Platform.is_ecs_instance()) or (hostname is not None and EC2.is_default(hostname)):
        instanceid = EC2.get_instance_id(config)
        if instanceid:
            hostname = instanceid

    # fall back on socket.gethostname(), socket.getfqdn() is too unreliable
    if hostname is None:
        try:
            socket_hostname = socket.gethostname()
        except socket.error:
            socket_hostname = None
        if socket_hostname and is_valid_hostname(socket_hostname):
            hostname = socket_hostname

    if hostname is None:
        log.critical('Unable to reliably determine host name. You can define one in datadog.conf or in your hosts file')
        raise Exception('Unable to reliably determine host name. You can define one in datadog.conf or in your hosts file')

    return hostname
Exemple #6
0
def get_hostname(config=None):
    """
    Get the canonical host name this agent should identify as. This is
    the authoritative source of the host name for the agent.

    Tries, in order:

      * agent config (datadog.conf, "hostname:")
      * 'hostname -f' (on unix)
      * socket.gethostname()
    """
    hostname = None

    # first, try the config
    if config is None:
        from config import get_config
        config = get_config(parse_args=True)
    config_hostname = config.get('hostname')
    if config_hostname and is_valid_hostname(config_hostname):
        return config_hostname

    # Try to get GCE instance name
    gce_hostname = GCE.get_hostname(config)
    if gce_hostname is not None:
        if is_valid_hostname(gce_hostname):
            return gce_hostname

    # Try to get the docker hostname
    if Platform.is_containerized():

        # First we try from the Docker API
        docker_util = DockerUtil()
        docker_hostname = docker_util.get_hostname(use_default_gw=False)
        if docker_hostname is not None and is_valid_hostname(docker_hostname):
            hostname = docker_hostname

        elif Platform.is_k8s():  # Let's try from the kubelet
            try:
                kube_util = KubeUtil()
            except Exception as ex:
                log.error("Couldn't instantiate the kubernetes client, "
                          "getting the k8s hostname won't work. Error: %s" %
                          str(ex))
            else:
                _, kube_hostname = kube_util.get_node_info()
                if kube_hostname is not None and is_valid_hostname(
                        kube_hostname):
                    hostname = kube_hostname

    # then move on to os-specific detection
    if hostname is None:
        if Platform.is_unix() or Platform.is_solaris():
            unix_hostname = _get_hostname_unix()
            if unix_hostname and is_valid_hostname(unix_hostname):
                hostname = unix_hostname

    # if we don't have a hostname, or we have an ec2 default hostname,
    # see if there's an instance-id available
    if not Platform.is_windows() and (hostname is None
                                      or Platform.is_ecs_instance()
                                      or EC2.is_default(hostname)):
        instanceid = EC2.get_instance_id(config)
        if instanceid:
            hostname = instanceid

    # fall back on socket.gethostname(), socket.getfqdn() is too unreliable
    if hostname is None:
        try:
            socket_hostname = socket.gethostname()
        except socket.error:
            socket_hostname = None
        if socket_hostname and is_valid_hostname(socket_hostname):
            hostname = socket_hostname

    if hostname is None:
        log.critical(
            'Unable to reliably determine host name. You can define one in datadog.conf or in your hosts file'
        )
        raise Exception(
            'Unable to reliably determine host name. You can define one in datadog.conf or in your hosts file'
        )

    return hostname
Exemple #7
0
class KubeUtil:
    __metaclass__ = Singleton

    DEFAULT_METHOD = 'http'
    KUBELET_HEALTH_PATH = '/healthz'
    MACHINE_INFO_PATH = '/api/v1.3/machine/'
    METRICS_PATH = '/api/v1.3/subcontainers/'
    PODS_LIST_PATH = '/pods/'
    DEFAULT_CADVISOR_PORT = 4194
    DEFAULT_HTTP_KUBELET_PORT = 10255
    DEFAULT_HTTPS_KUBELET_PORT = 10250
    DEFAULT_MASTER_PORT = 443
    DEFAULT_MASTER_NAME = 'kubernetes'  # DNS name to reach the master from a pod.
    DEFAULT_LABEL_PREFIX = 'kube_'
    DEFAULT_COLLECT_SERVICE_TAG = True
    CA_CRT_PATH = '/var/run/secrets/kubernetes.io/serviceaccount/ca.crt'
    AUTH_TOKEN_PATH = '/var/run/secrets/kubernetes.io/serviceaccount/token'

    POD_NAME_LABEL = "io.kubernetes.pod.name"
    NAMESPACE_LABEL = "io.kubernetes.pod.namespace"
    CONTAINER_NAME_LABEL = "io.kubernetes.container.name"

    def __init__(self, **kwargs):
        self.docker_util = DockerUtil()
        if 'init_config' in kwargs and 'instance' in kwargs:
            init_config = kwargs.get('init_config', {})
            instance = kwargs.get('instance', {})
        else:
            try:
                config_file_path = get_conf_path(KUBERNETES_CHECK_NAME)
                check_config = check_yaml(config_file_path)
                init_config = check_config['init_config'] or {}
                instance = check_config['instances'][0] or {}
            # kubernetes.yaml was not found
            except IOError as ex:
                log.error(ex.message)
                init_config, instance = {}, {}
            except Exception:
                log.error(
                    'Kubernetes configuration file is invalid. '
                    'Trying connecting to kubelet with default settings anyway...'
                )
                init_config, instance = {}, {}

        self.method = instance.get('method', KubeUtil.DEFAULT_METHOD)
        self._node_ip = self._node_name = None  # lazy evaluation
        self.host_name = os.environ.get('HOSTNAME')
        self.pod_name = os.environ.get('KUBERNETES_POD_NAME') or self.host_name
        self.tls_settings = self._init_tls_settings(instance)

        # apiserver
        if 'api_server_url' in instance:
            self.kubernetes_api_root_url = instance.get('api_server_url')
        else:
            master_host = os.environ.get(
                'KUBERNETES_SERVICE_HOST') or self.DEFAULT_MASTER_NAME
            master_port = os.environ.get(
                'KUBERNETES_SERVICE_PORT') or self.DEFAULT_MASTER_PORT
            self.kubernetes_api_root_url = 'https://%s:%s' % (master_host,
                                                              master_port)

        self.kubernetes_api_url = '%s/api/v1' % self.kubernetes_api_root_url

        # Service mapping helper class
        self._service_mapper = PodServiceMapper(self)
        from config import _is_affirmative
        self.collect_service_tag = _is_affirmative(
            instance.get('collect_service_tags',
                         KubeUtil.DEFAULT_COLLECT_SERVICE_TAG))

        # leader status triggers event collection
        self.is_leader = False
        self.leader_elector = None
        self.leader_lease_duration = instance.get('leader_lease_duration')

        # kubelet
        # If kubelet_api_url is None, init_kubelet didn't succeed yet.
        self.init_success = False
        self.kubelet_api_url = None
        self.init_retry_interval = init_config.get('init_retry_interval',
                                                   DEFAULT_RETRY_INTERVAL)
        self.last_init_retry = None
        self.left_init_retries = init_config.get('init_retries',
                                                 DEFAULT_INIT_RETRIES) + 1
        self.init_kubelet(instance)

        self.kube_label_prefix = instance.get('label_to_tag_prefix',
                                              KubeUtil.DEFAULT_LABEL_PREFIX)
        self.kube_node_labels = instance.get('node_labels_to_host_tags', {})

        # keep track of the latest k8s event we collected and posted
        # default value is 0 but TTL for k8s events is one hour anyways
        self.last_event_collection_ts = 0

    def _init_tls_settings(self, instance):
        """
        Initialize TLS settings for connection to apiserver and kubelet.
        """
        tls_settings = {}

        # apiserver
        client_crt = instance.get('apiserver_client_crt')
        client_key = instance.get('apiserver_client_key')
        apiserver_cacert = instance.get('apiserver_ca_cert')

        if client_crt and client_key and os.path.exists(
                client_crt) and os.path.exists(client_key):
            tls_settings['apiserver_client_cert'] = (client_crt, client_key)

        if apiserver_cacert and os.path.exists(apiserver_cacert):
            tls_settings['apiserver_cacert'] = apiserver_cacert

        # kubelet
        kubelet_client_crt = instance.get('kubelet_client_crt')
        kubelet_client_key = instance.get('kubelet_client_key')
        if kubelet_client_crt and kubelet_client_key and os.path.exists(
                kubelet_client_crt) and os.path.exists(kubelet_client_key):
            tls_settings['kubelet_client_cert'] = (kubelet_client_crt,
                                                   kubelet_client_key)

        cert = instance.get('kubelet_cert')
        if cert:
            tls_settings['kubelet_verify'] = cert
        else:
            tls_settings['kubelet_verify'] = instance.get(
                'kubelet_tls_verify', DEFAULT_TLS_VERIFY)

        if ('apiserver_client_cert'
                not in tls_settings) or ('kubelet_client_cert'
                                         not in tls_settings):
            # Only lookup token if we don't have client certs for both
            token = self.get_auth_token(instance)
            if token:
                tls_settings['bearer_token'] = token

        return tls_settings

    def init_kubelet(self, instance):
        """
        Handles the retry logic around _locate_kubelet.
        Once _locate_kubelet succeeds, initialize all kubelet-related
        URLs and settings.
        """
        if self.left_init_retries == 0:
            raise Exception(
                "Kubernetes client initialization failed permanently. "
                "Kubernetes-related features will fail.")

        now = time.time()

        # last retry was less than retry_interval ago
        if self.last_init_retry and now <= self.last_init_retry + self.init_retry_interval:
            return
        # else it's the first try, or last retry was long enough ago
        self.last_init_retry = now
        self.left_init_retries -= 1

        try:
            self.kubelet_api_url = self._locate_kubelet(instance)
        except Exception as ex:
            log.error(
                "Failed to initialize kubelet connection. Will retry %s time(s). Error: %s"
                % (self.left_init_retries, str(ex)))
            return
        if not self.kubelet_api_url:
            log.error(
                "Failed to initialize kubelet connection. Will retry %s time(s)."
                % self.left_init_retries)
            return

        self.init_success = True

        self.kubelet_host = self.kubelet_api_url.split(':')[1].lstrip('/')
        self.pods_list_url = urljoin(self.kubelet_api_url,
                                     KubeUtil.PODS_LIST_PATH)
        self.kube_health_url = urljoin(self.kubelet_api_url,
                                       KubeUtil.KUBELET_HEALTH_PATH)

        # namespace of the agent pod
        try:
            self.self_namespace = self.get_self_namespace()
        except Exception:
            log.warning(
                "Failed to get the agent pod namespace, defaulting to default."
            )
            self.self_namespace = DEFAULT_NAMESPACE

        # cadvisor
        self.cadvisor_port = instance.get('port',
                                          KubeUtil.DEFAULT_CADVISOR_PORT)
        self.cadvisor_url = '%s://%s:%d' % (self.method, self.kubelet_host,
                                            self.cadvisor_port)
        self.metrics_url = urljoin(self.cadvisor_url, KubeUtil.METRICS_PATH)
        self.machine_info_url = urljoin(self.cadvisor_url,
                                        KubeUtil.MACHINE_INFO_PATH)

    def _locate_kubelet(self, instance):
        """
        Kubelet may or may not accept un-authenticated http requests.
        If it doesn't we need to use its HTTPS API that may or may not
        require auth.
        Returns the kubelet URL or raises.
        """
        host = os.environ.get('KUBERNETES_KUBELET_HOST') or instance.get(
            "host")
        if not host:
            # if no hostname was provided, use the docker hostname if cert
            # validation is not required, the kubernetes hostname otherwise.
            docker_hostname = self.docker_util.get_hostname(
                should_resolve=True)
            if self.tls_settings.get('kubelet_verify'):
                try:
                    k8s_hostname = self.get_node_hostname(docker_hostname)
                    host = k8s_hostname or docker_hostname
                except Exception as ex:
                    log.error(str(ex))
                    host = docker_hostname
            else:
                host = docker_hostname

        # check if the no-auth endpoint is enabled
        port = instance.get('kubelet_port', KubeUtil.DEFAULT_HTTP_KUBELET_PORT)
        no_auth_url = 'http://%s:%s' % (host, port)
        test_url = urljoin(no_auth_url, KubeUtil.KUBELET_HEALTH_PATH)
        try:
            self.perform_kubelet_query(test_url)
            return no_auth_url
        except Exception:
            log.debug(
                "Couldn't query kubelet over HTTP, assuming it's not in no_auth mode."
            )

        port = instance.get('kubelet_port',
                            KubeUtil.DEFAULT_HTTPS_KUBELET_PORT)
        https_url = 'https://%s:%s' % (host, port)
        test_url = urljoin(https_url, KubeUtil.KUBELET_HEALTH_PATH)
        try:
            self.perform_kubelet_query(test_url)
            return https_url
        except Exception as ex:
            log.warning(
                "Couldn't query kubelet over HTTP, assuming it's not in no_auth mode."
            )
            raise ex

    def get_self_namespace(self):
        pods = self.retrieve_pods_list()
        for pod in pods.get('items', []):
            if pod.get('metadata', {}).get('name') == self.pod_name:
                return pod['metadata']['namespace']
        log.warning(
            "Couldn't find the agent pod and namespace, using the default.")
        return DEFAULT_NAMESPACE

    def get_node_hostname(self, host):
        """
        Query the API server for the kubernetes hostname of the node
        using the docker hostname as a filter.
        """
        node_filter = {'labelSelector': 'kubernetes.io/hostname=%s' % host}
        node = self.retrieve_json_auth(self.kubernetes_api_url + '/nodes?%s' %
                                       urlencode(node_filter)).json()
        if len(node['items']) != 1:
            log.error(
                'Error while getting node hostname: expected 1 node, got %s.' %
                len(node['items']))
        else:
            addresses = (node or {}).get('items',
                                         [{}])[0].get('status',
                                                      {}).get('addresses', [])
            for address in addresses:
                if address.get('type') == 'Hostname':
                    return address['address']
        return None

    def get_kube_pod_tags(self, excluded_keys=None):
        """
        Gets pods' labels as tags + creator and service tags.
        Returns a dict{namespace/podname: [tags]}
        """
        if not self.init_success:
            log.warning(
                "Kubernetes client is not initialized, can't get pod tags.")
            return {}
        pods = self.retrieve_pods_list()
        return self.extract_kube_pod_tags(pods, excluded_keys=excluded_keys)

    def extract_kube_pod_tags(self,
                              pods_list,
                              excluded_keys=None,
                              label_prefix=None):
        """
        Extract labels + creator and service tags from a list of
        pods coming from the kubelet API.

        :param excluded_keys: labels to skip
        :param label_prefix: prefix for label->tag conversion, None defaults
        to the configuration option label_to_tag_prefix
        Returns a dict{namespace/podname: [tags]}
        """
        excluded_keys = excluded_keys or []
        kube_labels = defaultdict(list)
        pod_items = pods_list.get("items") or []
        label_prefix = label_prefix or self.kube_label_prefix
        for pod in pod_items:
            metadata = pod.get("metadata", {})
            name = metadata.get("name")
            namespace = metadata.get("namespace")
            labels = metadata.get("labels", {})
            if name and namespace:
                key = "%s/%s" % (namespace, name)

                # Extract creator tags
                podtags = self.get_pod_creator_tags(metadata)

                # Extract services tags
                if self.collect_service_tag:
                    for service in self.match_services_for_pod(metadata):
                        if service is not None:
                            podtags.append(u'kube_service:%s' % service)

                # Extract labels
                for k, v in labels.iteritems():
                    if k in excluded_keys:
                        continue
                    podtags.append(u"%s%s:%s" % (label_prefix, k, v))

                kube_labels[key] = podtags

        return kube_labels

    def retrieve_pods_list(self):
        """
        Retrieve the list of pods for this cluster querying the kubelet API.

        TODO: the list of pods could be cached with some policy to be decided.
        """
        return self.perform_kubelet_query(self.pods_list_url).json()

    def retrieve_machine_info(self):
        """
        Retrieve machine info from Cadvisor.
        """
        return retrieve_json(self.machine_info_url)

    def retrieve_metrics(self):
        """
        Retrieve metrics from Cadvisor.
        """
        return retrieve_json(self.metrics_url)

    def get_deployment_for_replicaset(self, rs_name):
        """
        Get the deployment name for a given replicaset name
        For now, the rs name's first part always is the deployment's name, see
        https://github.com/kubernetes/kubernetes/blob/release-1.6/pkg/controller/deployment/sync.go#L299
        But it might change in a future k8s version. The other way to match RS and deployments is
        to parse and cache /apis/extensions/v1beta1/replicasets, mirroring PodServiceMapper
        In 1.8, the hash generation logic changed: https://github.com/kubernetes/kubernetes/pull/51538/files

        As we are matching both patterns without checking the apiserver version, we might have
        some false positives. For agent6, we plan on doing this pod->replicaset->deployment matching
        in the cluster agent, with replicaset data from the apiserver. This will address that risk.
        """
        end = rs_name.rfind("-")
        if end > 0 and rs_name[end + 1:].isdigit():
            # k8s before 1.8
            return rs_name[0:end]
        if end > 0 and len(rs_name[end + 1:]) == 10:
            # k8s 1.8+ maybe? Check contents
            for char in rs_name[end + 1:]:
                if char not in ALLOWED_ENCODESTRING_ALPHANUMS:
                    return None
            return rs_name[0:end]
        else:
            return None

    def perform_kubelet_query(self, url, verbose=True, timeout=10):
        """
        Perform and return a GET request against kubelet. Support auth and TLS validation.
        """
        tls_context = self.tls_settings

        headers = None
        cert = tls_context.get('kubelet_client_cert')
        verify = tls_context.get('kubelet_verify', DEFAULT_TLS_VERIFY)

        # if cert-based auth is enabled, don't use the token.
        if not cert and url.lower().startswith(
                'https') and 'bearer_token' in self.tls_settings:
            headers = {
                'Authorization':
                'Bearer {}'.format(self.tls_settings.get('bearer_token'))
            }

        return requests.get(url,
                            timeout=timeout,
                            verify=verify,
                            cert=cert,
                            headers=headers,
                            params={'verbose': verbose})

    def get_apiserver_auth_settings(self):
        """
        Kubernetes API requires authentication using a token available in
        every pod, or with a client X509 cert/key pair.
        We authenticate using the service account token by default
        and replace this behavior with cert authentication if the user provided
        a cert/key pair in the instance.

        We try to verify the server TLS cert if the public cert is available.
        """
        verify = self.tls_settings.get('apiserver_cacert')
        if not verify:
            verify = self.CA_CRT_PATH if os.path.exists(
                self.CA_CRT_PATH) else False
        log.debug('tls validation: {}'.format(verify))

        cert = self.tls_settings.get('apiserver_client_cert')
        bearer_token = self.tls_settings.get(
            'bearer_token') if not cert else None
        headers = {
            'Authorization': 'Bearer {}'.format(bearer_token)
        } if bearer_token else {}
        headers['content-type'] = 'application/json'
        return cert, headers, verify

    def retrieve_json_auth(self, url, params=None, timeout=3):
        cert, headers, verify = self.get_apiserver_auth_settings()
        res = requests.get(url,
                           timeout=timeout,
                           headers=headers,
                           verify=verify,
                           cert=cert,
                           params=params)
        res.raise_for_status()
        return res

    def post_json_to_apiserver(self, url, data, timeout=3):
        cert, headers, verify = self.get_apiserver_auth_settings()
        res = requests.post(url,
                            timeout=timeout,
                            headers=headers,
                            verify=verify,
                            cert=cert,
                            data=json.dumps(data))
        res.raise_for_status()
        return res

    def put_json_to_apiserver(self, url, data, timeout=3):
        cert, headers, verify = self.get_apiserver_auth_settings()
        res = requests.put(url,
                           timeout=timeout,
                           headers=headers,
                           verify=verify,
                           cert=cert,
                           data=json.dumps(data))
        res.raise_for_status()
        return res

    def delete_to_apiserver(self, url, timeout=3):
        cert, headers, verify = self.get_apiserver_auth_settings()
        res = requests.delete(url,
                              timeout=timeout,
                              headers=headers,
                              verify=verify,
                              cert=cert)
        res.raise_for_status()
        return res

    def get_node_info(self):
        """
        Return the IP address and the hostname of the node where the pod is running.
        """
        if None in (self._node_ip, self._node_name):
            self._fetch_host_data()
        return self._node_ip, self._node_name

    def get_node_metadata(self):
        """Returns host metadata about the local k8s node"""
        meta = {}

        # API server version
        try:
            request_url = "%s/version" % self.kubernetes_api_root_url
            master_info = self.retrieve_json_auth(request_url).json()
            version = master_info.get("gitVersion")
            meta['kube_master_version'] = version[1:]
        except Exception as ex:
            # Intentional use of non-safe lookups to get the exception in the debug logs
            # if the parsing were to fail
            log.debug("Error getting Kube master version: %s" % str(ex))

        # Kubelet version & labels
        if not self.init_success:
            log.warning(
                "Kubelet client failed to initialize, kubelet host tags will be missing for now."
            )
            return meta
        try:
            _, node_name = self.get_node_info()
            if not node_name:
                raise ValueError("node name missing or empty")
            request_url = "%s/nodes/%s" % (self.kubernetes_api_url, node_name)
            node_info = self.retrieve_json_auth(request_url).json()
            version = node_info.get("status").get("nodeInfo").get(
                "kubeletVersion")
            meta['kubelet_version'] = version[1:]
        except Exception as ex:
            log.debug("Error getting Kubelet version: %s" % str(ex))

        return meta

    def get_node_hosttags(self):
        """
        Returns node labels as tags. Tag name is transformed as defined
        in node_labels_to_host_tags in the kubernetes check configuration.
        Note: queries the API server for node info. Configure RBAC accordingly.
        """
        tags = []

        try:
            _, node_name = self.get_node_info()
            if not node_name:
                raise ValueError("node name missing or empty")

            request_url = "%s/nodes/%s" % (self.kubernetes_api_url, node_name)
            node_info = self.retrieve_json_auth(request_url).json()
            node_labels = node_info.get('metadata', {}).get('labels', {})

            for l_name, t_name in self.kube_node_labels.iteritems():
                if l_name in node_labels:
                    tags.append('%s:%s' % (t_name, node_labels[l_name]))

        except Exception as ex:
            log.debug("Error getting node labels: %s" % str(ex))

        return tags

    def _fetch_host_data(self):
        """
        Retrieve host name and IP address from the payload returned by the listing
        pods endpoints from kubelet.

        The host IP address is different from the default router for the pod.
        """
        try:
            pod_items = self.retrieve_pods_list().get("items") or []
        except Exception as e:
            log.warning(
                "Unable to retrieve pod list %s. Not fetching host data",
                str(e))
            return

        for pod in pod_items:
            metadata = pod.get("metadata", {})
            name = metadata.get("name")
            if name == self.pod_name:
                status = pod.get('status', {})
                spec = pod.get('spec', {})
                # if not found, use an empty string - we use None as "not initialized"
                self._node_ip = status.get('hostIP', '')
                self._node_name = spec.get('nodeName', '')
                break

    def extract_event_tags(self, event):
        """
        Return a list of tags extracted from an event object
        """
        tags = []

        if 'reason' in event:
            tags.append('reason:%s' % event.get('reason', '').lower())
        if 'namespace' in event.get('metadata', {}):
            tags.append('namespace:%s' % event['metadata']['namespace'])
        if 'host' in event.get('source', {}):
            tags.append('node_name:%s' % event['source']['host'])
        if 'kind' in event.get('involvedObject', {}):
            tags.append('object_type:%s' %
                        event['involvedObject'].get('kind', '').lower())
        if 'name' in event.get('involvedObject', {}):
            tags.append('object_name:%s' %
                        event['involvedObject'].get('name', '').lower())
        if 'component' in event.get('source', {}):
            tags.append('source_component:%s' %
                        event['source'].get('component', '').lower())

        return tags

    def are_tags_filtered(self, tags):
        """
        Because it is a pain to call it from the kubernetes check otherwise.
        """
        return self.docker_util.are_tags_filtered(tags)

    @classmethod
    def get_auth_token(cls, instance):
        """
        Return a string containing the authorization token for the pod.
        """

        token_path = instance.get('bearer_token_path', cls.AUTH_TOKEN_PATH)
        try:
            with open(token_path) as f:
                return f.read().strip()
        except IOError as e:
            log.error('Unable to read token from {}: {}'.format(token_path, e))

        return None

    def match_services_for_pod(self, pod_metadata, refresh=False):
        """
        Match the pods labels with services' label selectors to determine the list
        of services that point to that pod. Returns an array of service names.

        Pass refresh=True if you want to bypass the cached cid->services mapping (after a service change)
        """
        s = self._service_mapper.match_services_for_pod(pod_metadata,
                                                        refresh,
                                                        names=True)
        #log.warning("Matches for %s: %s" % (pod_metadata.get('name'), str(s)))
        return s

    def get_event_retriever(self, namespaces=None, kinds=None, delay=None):
        """
        Returns a KubeEventRetriever object ready for action
        """
        return KubeEventRetriever(self, namespaces, kinds, delay)

    def match_containers_for_pods(self, pod_uids, podlist=None):
        """
        Reads a set of pod uids and returns the set of docker
        container ids they manage
        podlist should be a recent self.retrieve_pods_list return value,
        if not given that method will be called
        """
        cids = set()

        if not isinstance(pod_uids, set) or len(pod_uids) < 1:
            return cids

        if podlist is None:
            podlist = self.retrieve_pods_list()

        for pod in podlist.get('items', {}):
            uid = pod.get('metadata', {}).get('uid', None)
            if uid in pod_uids:
                for container in pod.get('status',
                                         {}).get('containerStatuses', None):
                    id = container.get('containerID', "")
                    if id.startswith("docker://"):
                        cids.add(id[9:])

        return cids

    def get_pod_creator(self, pod_metadata):
        """
        Get the pod's creator from its metadata and returns a
        tuple (creator_kind, creator_name)

        This allows for consitency across code path
        """
        try:
            created_by = json.loads(
                pod_metadata['annotations']['kubernetes.io/created-by'])
            creator_kind = created_by.get('reference', {}).get('kind')
            creator_name = created_by.get('reference', {}).get('name')
            return (creator_kind, creator_name)
        except Exception:
            log.debug('Could not parse creator for pod ' +
                      pod_metadata.get('name', ''))
            return (None, None)

    def get_pod_creator_tags(self,
                             pod_metadata,
                             legacy_rep_controller_tag=False):
        """
        Get the pod's creator from its metadata and returns a list of tags
        in the form kube_$kind:$name, ready to add to the metrics
        """
        try:
            tags = []
            creator_kind, creator_name = self.get_pod_creator(pod_metadata)
            if creator_kind in CREATOR_KIND_TO_TAG and creator_name:
                tags.append("%s:%s" %
                            (CREATOR_KIND_TO_TAG[creator_kind], creator_name))
                if creator_kind == 'ReplicaSet':
                    deployment = self.get_deployment_for_replicaset(
                        creator_name)
                    if deployment:
                        tags.append(
                            "%s:%s" %
                            (CREATOR_KIND_TO_TAG['Deployment'], deployment))
            if legacy_rep_controller_tag and creator_kind != 'ReplicationController' and creator_name:
                tags.append(
                    'kube_replication_controller:{0}'.format(creator_name))

            return tags
        except Exception:
            log.warning('Could not parse creator tags for pod ' +
                        pod_metadata.get('name'))
            return []

    def process_events(self, event_array, podlist=None):
        """
        Reads a list of kube events, invalidates caches and and computes a set
        of containers impacted by the changes, to refresh service discovery
        Pod creation/deletion events are ignored for now, as docker_daemon already
        sends container creation/deletion events to SD

        Pod->containers matching is done using match_containers_for_pods
        """
        try:
            pods = set()
            if self._service_mapper:
                pods.update(self._service_mapper.process_events(event_array))
            return self.match_containers_for_pods(pods, podlist)
        except Exception as e:
            log.warning("Error processing events %s: %s" %
                        (str(event_array), e))
            return set()

    def refresh_leader(self):
        if not self.init_success:
            log.warning(
                "Kubelet client is not initialized, leader election is disabled."
            )
            return
        if not self.leader_elector:
            self.leader_elector = LeaderElector(self)
        self.leader_elector.try_acquire_or_refresh()

    def image_name_resolver(self, image):
        """
        Wraps around the sibling dockerutil method and catches exceptions
        """
        if image is None:
            return None
        try:
            return self.docker_util.image_name_resolver(image)
        except Exception as e:
            log.warning("Error resolving image name: %s", str(e))
            return image
Exemple #8
0
class KubeUtil:

    DEFAULT_METHOD = 'http'
    MACHINE_INFO_PATH = '/api/v1.3/machine/'
    METRICS_PATH = '/api/v1.3/subcontainers/'
    DEPLOYMENTS_LIST_PATH = 'deployments/'
    REPLICASETS_LIST_PATH = 'replicasets/'
    PODS_LIST_PATH = 'pods/'
    SERVICES_LIST_PATH = 'services/'
    NODES_LIST_PATH = 'nodes/'
    ENDPOINTS_LIST_PATH = 'endpoints/'
    DEFAULT_CADVISOR_PORT = 4194
    DEFAULT_KUBELET_PORT = 10255
    DEFAULT_MASTER_METHOD = 'https'
    DEFAULT_MASTER_PORT = 443
    DEFAULT_MASTER_NAME = 'kubernetes'  # DNS name to reach the master from a pod.
    DEFAULT_USE_KUBE_AUTH = False
    CA_CRT_PATH = '/run/secrets/kubernetes.io/serviceaccount/ca.crt'
    AUTH_TOKEN_PATH = '/run/secrets/kubernetes.io/serviceaccount/token'
    DEFAULT_TIMEOUT_SECONDS = 10

    POD_NAME_LABEL = "io.kubernetes.pod.name"
    NAMESPACE_LABEL = "io.kubernetes.pod.namespace"

    def __init__(self, instance=None):
        self.docker_util = DockerUtil()
        if instance is None:
            try:
                config_file_path = get_conf_path(KUBERNETES_CHECK_NAME)
                check_config = check_yaml(config_file_path)
                instance = check_config['instances'][0]
            # kubernetes.yaml was not found
            except IOError as ex:
                log.error(ex.message)
                instance = {}
            except Exception:
                log.error(
                    'Kubernetes configuration file is invalid. '
                    'Trying connecting to kubelet with default settings anyway...'
                )
                instance = {}

        self.timeoutSeconds = instance.get("timeoutSeconds",
                                           KubeUtil.DEFAULT_TIMEOUT_SECONDS)
        self.method = instance.get('method', KubeUtil.DEFAULT_METHOD)
        self.host = instance.get("host") or self.docker_util.get_hostname()
        self._node_ip = self._node_name = None  # lazy evaluation
        self.host_name = os.environ.get('HOSTNAME')

        self.cadvisor_port = instance.get('port',
                                          KubeUtil.DEFAULT_CADVISOR_PORT)
        self.kubelet_port = instance.get('kubelet_port',
                                         KubeUtil.DEFAULT_KUBELET_PORT)
        self.master_method = instance.get('master_method',
                                          KubeUtil.DEFAULT_MASTER_METHOD)
        self.master_name = instance.get('master_name',
                                        KubeUtil.DEFAULT_MASTER_NAME)
        self.master_port = instance.get('master_port',
                                        KubeUtil.DEFAULT_MASTER_PORT)
        self.use_kube_auth = instance.get('use_kube_auth',
                                          KubeUtil.DEFAULT_USE_KUBE_AUTH)

        self.kubelet_api_url = '%s://%s:%d' % (self.method, self.host,
                                               self.kubelet_port)
        self.cadvisor_url = '%s://%s:%d' % (self.method, self.host,
                                            self.cadvisor_port)
        self.master_host = os.environ.get('KUBERNETES_SERVICE_HOST') or (
            '%s:%d' % (self.master_name, self.master_port))
        self.kubernetes_api_url = '%s://%s/api/v1/' % (self.master_method,
                                                       self.master_host)
        self.kubernetes_api_extension_url = '%s://%s/apis/extensions/v1beta1/' % (
            self.master_method, self.master_host)

        self.metrics_url = urljoin(self.cadvisor_url, KubeUtil.METRICS_PATH)
        self.machine_info_url = urljoin(self.cadvisor_url,
                                        KubeUtil.MACHINE_INFO_PATH)
        self.nodes_list_url = urljoin(self.kubernetes_api_url,
                                      KubeUtil.NODES_LIST_PATH)
        self.services_list_url = urljoin(self.kubernetes_api_url,
                                         KubeUtil.SERVICES_LIST_PATH)
        self.endpoints_list_url = urljoin(self.kubernetes_api_url,
                                          KubeUtil.ENDPOINTS_LIST_PATH)
        self.pods_list_url = urljoin(self.kubernetes_api_url,
                                     KubeUtil.PODS_LIST_PATH)
        self.deployments_list_url = urljoin(self.kubernetes_api_extension_url,
                                            KubeUtil.DEPLOYMENTS_LIST_PATH)

        self.kube_health_url = urljoin(self.kubelet_api_url, 'healthz')

        # keep track of the latest k8s event we collected and posted
        # default value is 0 but TTL for k8s events is one hour anyways
        self.last_event_collection_ts = defaultdict(int)

    def get_kube_labels(self, excluded_keys=None):
        pods = self.retrieve_pods_list()
        return self.extract_kube_labels(pods, excluded_keys=excluded_keys)

    def extract_kube_labels(self, pods_list, excluded_keys=None):
        """
        Extract labels from a list of pods coming from
        the kubelet API.
        """
        excluded_keys = excluded_keys or []
        kube_labels = defaultdict(list)
        pod_items = pods_list.get("items") or []
        for pod in pod_items:
            metadata = pod.get("metadata", {})
            pod_labels = self.extract_metadata_labels(metadata, excluded_keys)
            kube_labels.update(pod_labels)

        return kube_labels

    def extract_metadata_labels(self,
                                metadata,
                                excluded_keys={},
                                add_kube_prefix=True):
        """
        Extract labels from metadata section coming from the kubelet API.
        """
        kube_labels = defaultdict(list)
        name = metadata.get("name")
        namespace = metadata.get("namespace")
        labels = metadata.get("labels")
        if name and labels:
            if namespace:
                key = "%s/%s" % (namespace, name)
            else:
                key = name

            for k, v in labels.iteritems():
                if k in excluded_keys:
                    continue
                if add_kube_prefix:
                    kube_labels[key].append(u"kube_%s:%s" % (k, v))
                else:
                    kube_labels[key].append(u"%s:%s" % (k, v))
        return kube_labels

    def extract_meta(self, pods_list, field_name):
        """
        Exctract fields like `uid` or `name` from the `metadata` section of a
        list of pods coming from the kubelet API.

        TODO: currently not in use, was added to support events filtering, consider to remove it.
        """
        uids = []
        pods = pods_list.get("items") or []
        for p in pods:
            value = p.get('metadata', {}).get(field_name)
            if value is not None:
                uids.append(value)
        return uids

    def retrieve_pods_list(self):
        """
        Retrieve the list of pods for this cluster querying the kubelet API.

        TODO: the list of pods could be cached with some policy to be decided.
        """
        return self.retrieve_json_with_optional_auth(url=self.pods_list_url)

    def retrieve_endpoints_list(self):
        """
        Retrieve the list of endpoints for this cluster querying the kubelet API.

        TODO: the list of endpoints could be cached with some policy to be decided.
        """
        return self.retrieve_json_with_optional_auth(
            url=self.endpoints_list_url)

    def retrieve_machine_info(self):
        """
        Retrieve machine info from Cadvisor.
        """
        return self.retrieve_json_with_optional_auth(url=self.machine_info_url)

    def retrieve_metrics(self):
        """
        Retrieve metrics from Cadvisor.
        """
        return self.retrieve_json_with_optional_auth(url=self.metrics_url)

    def retrieve_nodes_list(self):
        """
        Retrieve the list of nodes for this cluster querying the kublet API.
        """
        return self.retrieve_json_with_optional_auth(self.nodes_list_url)

    def retrieve_services_list(self):
        """
        Retrieve the list of services for this cluster querying the kublet API.
        """
        return self.retrieve_json_with_optional_auth(
            url=self.services_list_url)

    def retrieve_json_with_optional_auth(self, url):
        if self.use_kube_auth:
            return self.retrieve_json_auth(url=url,
                                           auth_token=self.get_auth_token(),
                                           timeout=self.timeoutSeconds)
        else:
            return retrieve_json(url=url, timeout=self.timeoutSeconds)

    def retrieve_deployments_list(self):
        """
        Retrieve the list of deployments for this cluster querying the kublet API extensions.
        https://kubernetes.io/docs/concepts/workloads/controllers/deployment/
        """
        return self.retrieve_json_with_optional_auth(
            url=self.deployments_list_url)

    def retrieve_replicaset_filtered_list(self,
                                          namespace=None,
                                          labels_dict=None):
        """
        Retrieve the list of replicasets for given parameters, namespace and labels selector.
        https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/

        The replicaset filter is very similar to how it is implemented in kubernetes dashboard:
        https://github.com/kubernetes/dashboard/blob/master/src/app/backend/resource/deployment/detail.go
        https://github.com/kubernetes/dashboard/blob/master/src/app/backend/resource/common/resourcechannels.go
        """
        if labels_dict and len(labels_dict) > 0:
            params = "?labelSelector=%s" % self._to_label_selector(labels_dict)
        else:
            params = ""

        if namespace:
            fetch_url = "%snamespaces/%s/%s%s" % (
                self.kubernetes_api_extension_url, namespace,
                KubeUtil.REPLICASETS_LIST_PATH, params)
        else:
            fetch_url = "%s%s%s" % (self.kubernetes_api_extension_url,
                                    KubeUtil.REPLICASETS_LIST_PATH, params)

        return self._retrieve_replicaset_list(fetch_url=fetch_url)

    def _retrieve_replicaset_list(self, fetch_url):
        """
        Retrieve the list of replicasets for given parameters, namespace and labels selector.
        https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/
        """
        return self.retrieve_json_with_optional_auth(url=fetch_url)

    def _to_label_selector(self, labels_dict):
        """
        Render labels dict {'app': 'nginxapp', 'pod-template-hash': 275046495} to a label selector in a form "app%3Dnginxapp,pod-template-hash%3D275046495"
        """
        labels = [
            "%s%%3D%s" % (name, value) for name, value in labels_dict.items()
        ]
        return ",".join(labels)

    def filter_pods_list(self, pods_list, host_ip):
        """
        Filter out (in place) pods that are not running on the given host.

        TODO: currently not in use, was added to support events filtering, consider to remove it.
        """
        pod_items = pods_list.get('items') or []
        log.debug('Found {} pods to filter'.format(len(pod_items)))

        filtered_pods = []
        for pod in pod_items:
            status = pod.get('status', {})
            if status.get('hostIP') == host_ip:
                filtered_pods.append(pod)
        log.debug('Pods after filtering: {}'.format(len(filtered_pods)))

        pods_list['items'] = filtered_pods
        return pods_list

    def retrieve_json_auth(self, url, auth_token, timeout=10):
        """
        Kubernetes API requires authentication using a token available in
        every pod.

        We try to verify ssl certificate if available.
        """
        verify = self.CA_CRT_PATH if os.path.exists(
            self.CA_CRT_PATH) else False
        log.debug('ssl validation: {}'.format(verify))
        headers = {'Authorization': 'Bearer {}'.format(auth_token)}
        r = requests.get(url, timeout=timeout, headers=headers, verify=verify)
        r.raise_for_status()
        return r.json()

    def get_node_info(self):
        """
        Return the IP address and the hostname of the node where the pod is running.
        """
        if None in (self._node_ip, self._node_name):
            self._fetch_host_data()
        return self._node_ip, self._node_name

    def _fetch_host_data(self):
        """
        Retrieve host name and IP address from the payload returned by the listing
        pods endpoints from kubelet or kubernetes API.

        The host IP address is different from the default router for the pod.
        """
        try:
            pod_items = self.retrieve_pods_list().get("items") or []
        except Exception as e:
            log.warning(
                "Unable to retrieve pod list %s. Not fetching host data",
                str(e))
            return

        for pod in pod_items:
            metadata = pod.get("metadata", {})
            name = metadata.get("name")
            if name == self.host_name:
                status = pod.get('status', {})
                spec = pod.get('spec', {})
                # if not found, use an empty string - we use None as "not initialized"
                self._node_ip = status.get('hostIP', '')
                self._node_name = spec.get('nodeName', '')
                break

    def extract_event_tags(self, event):
        """
        Return a list of tags extracted from an event object
        """
        tags = []

        if 'reason' in event:
            tags.append('reason:%s' % event.get('reason', '').lower())
        if 'namespace' in event.get('metadata', {}):
            tags.append('namespace:%s' % event['metadata']['namespace'])
        if 'host' in event.get('source', {}):
            tags.append('node_name:%s' % event['source']['host'])
        if 'kind' in event.get('involvedObject', {}):
            tags.append('object_type:%s' %
                        event['involvedObject'].get('kind', '').lower())

        return tags

    def are_tags_filtered(self, tags):
        """
        Because it is a pain to call it from the kubernetes check otherwise.
        """
        return self.docker_util.are_tags_filtered(tags)

    @classmethod
    def get_auth_token(cls):
        """
        Return a string containing the authorization token for the pod.
        """
        try:
            with open(cls.AUTH_TOKEN_PATH) as f:
                return f.read()
        except IOError as e:
            log.error('Unable to read token from {}: {}'.format(
                cls.AUTH_TOKEN_PATH, e))

        return None
Exemple #9
0
class KubeUtil:
    __metaclass__ = Singleton

    DEFAULT_METHOD = 'http'
    MACHINE_INFO_PATH = '/api/v1.3/machine/'
    METRICS_PATH = '/api/v1.3/subcontainers/'
    PODS_LIST_PATH = '/pods/'
    DEFAULT_CADVISOR_PORT = 4194
    DEFAULT_KUBELET_PORT = 10255
    DEFAULT_MASTER_PORT = 8080
    DEFAULT_MASTER_NAME = 'kubernetes'  # DNS name to reach the master from a pod.
    CA_CRT_PATH = '/run/secrets/kubernetes.io/serviceaccount/ca.crt'
    AUTH_TOKEN_PATH = '/run/secrets/kubernetes.io/serviceaccount/token'

    POD_NAME_LABEL = "io.kubernetes.pod.name"
    NAMESPACE_LABEL = "io.kubernetes.pod.namespace"

    def __init__(self, instance=None):
        self.docker_util = DockerUtil()
        if instance is None:
            try:
                config_file_path = get_conf_path(KUBERNETES_CHECK_NAME)
                check_config = check_yaml(config_file_path)
                instance = check_config['instances'][0]
            # kubernetes.yaml was not found
            except IOError as ex:
                log.error(ex.message)
                instance = {}
            except Exception:
                log.error('Kubernetes configuration file is invalid. '
                          'Trying connecting to kubelet with default settings anyway...')
                instance = {}

        self.method = instance.get('method', KubeUtil.DEFAULT_METHOD)
        self.host = instance.get("host") or self.docker_util.get_hostname()
        self.kubelet_host = os.environ.get('KUBERNETES_KUBELET_HOST') or self.host
        self._node_ip = self._node_name = None  # lazy evaluation
        self.host_name = os.environ.get('HOSTNAME')

        self.cadvisor_port = instance.get('port', KubeUtil.DEFAULT_CADVISOR_PORT)
        self.kubelet_port = instance.get('kubelet_port', KubeUtil.DEFAULT_KUBELET_PORT)

        self.kubelet_api_url = '%s://%s:%d' % (self.method, self.kubelet_host, self.kubelet_port)
        self.cadvisor_url = '%s://%s:%d' % (self.method, self.kubelet_host, self.cadvisor_port)
        self.kubernetes_api_url = 'https://%s/api/v1' % (os.environ.get('KUBERNETES_SERVICE_HOST') or self.DEFAULT_MASTER_NAME)
        self.tls_settings = self._init_tls_settings(instance)

        self.metrics_url = urljoin(self.cadvisor_url, KubeUtil.METRICS_PATH)
        self.machine_info_url = urljoin(self.cadvisor_url, KubeUtil.MACHINE_INFO_PATH)
        self.pods_list_url = urljoin(self.kubelet_api_url, KubeUtil.PODS_LIST_PATH)
        self.kube_health_url = urljoin(self.kubelet_api_url, 'healthz')

        # keep track of the latest k8s event we collected and posted
        # default value is 0 but TTL for k8s events is one hour anyways
        self.last_event_collection_ts = 0

    def _init_tls_settings(self, instance):
        """
        Initialize TLS settings for connection to apiserver and kubelet.
        """
        tls_settings = {}

        client_crt = instance.get('apiserver_client_crt')
        client_key = instance.get('apiserver_client_key')
        apiserver_cacert = instance.get('apiserver_ca_cert')

        if client_crt and client_key and os.path.exists(client_crt) and os.path.exists(client_key):
            tls_settings['apiserver_client_cert'] = (client_crt, client_key)

        if apiserver_cacert and os.path.exists(apiserver_cacert):
            tls_settings['apiserver_cacert'] = apiserver_cacert

        token = self.get_auth_token()
        if token:
            tls_settings['bearer_token'] = token

        return tls_settings

    def get_kube_labels(self, excluded_keys=None):
        pods = self.retrieve_pods_list()
        return self.extract_kube_labels(pods, excluded_keys=excluded_keys)

    def extract_kube_labels(self, pods_list, excluded_keys=None):
        """
        Extract labels from a list of pods coming from
        the kubelet API.
        """
        excluded_keys = excluded_keys or []
        kube_labels = defaultdict(list)
        pod_items = pods_list.get("items") or []
        for pod in pod_items:
            metadata = pod.get("metadata", {})
            name = metadata.get("name")
            namespace = metadata.get("namespace")
            labels = metadata.get("labels")
            if name and labels and namespace:
                key = "%s/%s" % (namespace, name)

                for k, v in labels.iteritems():
                    if k in excluded_keys:
                        continue

                    kube_labels[key].append(u"kube_%s:%s" % (k, v))

        return kube_labels

    def extract_meta(self, pods_list, field_name):
        """
        Exctract fields like `uid` or `name` from the `metadata` section of a
        list of pods coming from the kubelet API.

        TODO: currently not in use, was added to support events filtering, consider to remove it.
        """
        uids = []
        pods = pods_list.get("items") or []
        for p in pods:
            value = p.get('metadata', {}).get(field_name)
            if value is not None:
                uids.append(value)
        return uids

    def retrieve_pods_list(self):
        """
        Retrieve the list of pods for this cluster querying the kubelet API.

        TODO: the list of pods could be cached with some policy to be decided.
        """
        return retrieve_json(self.pods_list_url)

    def retrieve_machine_info(self):
        """
        Retrieve machine info from Cadvisor.
        """
        return retrieve_json(self.machine_info_url)

    def retrieve_metrics(self):
        """
        Retrieve metrics from Cadvisor.
        """
        return retrieve_json(self.metrics_url)

    def filter_pods_list(self, pods_list, host_ip):
        """
        Filter out (in place) pods that are not running on the given host.

        TODO: currently not in use, was added to support events filtering, consider to remove it.
        """
        pod_items = pods_list.get('items') or []
        log.debug('Found {} pods to filter'.format(len(pod_items)))

        filtered_pods = []
        for pod in pod_items:
            status = pod.get('status', {})
            if status.get('hostIP') == host_ip:
                filtered_pods.append(pod)
        log.debug('Pods after filtering: {}'.format(len(filtered_pods)))

        pods_list['items'] = filtered_pods
        return pods_list

    def retrieve_json_auth(self, url, timeout=10):
        """
        Kubernetes API requires authentication using a token available in
        every pod, or with a client X509 cert/key pair.
        We authenticate using the service account token by default
        and replace this behavior with cert authentication if the user provided
        a cert/key pair in the instance.

        We try to verify the server TLS cert if the public cert is available.
        """
        verify = self.tls_settings.get('apiserver_cacert')
        if not verify:
            verify = self.CA_CRT_PATH if os.path.exists(self.CA_CRT_PATH) else False
        log.debug('ssl validation: {}'.format(verify))

        cert = self.tls_settings.get('apiserver_client_cert')
        bearer_token = self.tls_settings.get('bearer_token') if not cert else None
        headers = {'Authorization': 'Bearer {}'.format(bearer_token)} if bearer_token else None

        r = requests.get(url, timeout=timeout, headers=headers, verify=verify, cert=cert)
        r.raise_for_status()
        return r.json()

    def get_node_info(self):
        """
        Return the IP address and the hostname of the node where the pod is running.
        """
        if None in (self._node_ip, self._node_name):
            self._fetch_host_data()
        return self._node_ip, self._node_name

    def _fetch_host_data(self):
        """
        Retrieve host name and IP address from the payload returned by the listing
        pods endpoints from kubelet or kubernetes API.

        The host IP address is different from the default router for the pod.
        """
        try:
            pod_items = self.retrieve_pods_list().get("items") or []
        except Exception as e:
            log.warning("Unable to retrieve pod list %s. Not fetching host data", str(e))
            return

        for pod in pod_items:
            metadata = pod.get("metadata", {})
            name = metadata.get("name")
            if name == self.host_name:
                status = pod.get('status', {})
                spec = pod.get('spec', {})
                # if not found, use an empty string - we use None as "not initialized"
                self._node_ip = status.get('hostIP', '')
                self._node_name = spec.get('nodeName', '')
                break

    def extract_event_tags(self, event):
        """
        Return a list of tags extracted from an event object
        """
        tags = []

        if 'reason' in event:
            tags.append('reason:%s' % event.get('reason', '').lower())
        if 'namespace' in event.get('metadata', {}):
            tags.append('namespace:%s' % event['metadata']['namespace'])
        if 'host' in event.get('source', {}):
            tags.append('node_name:%s' % event['source']['host'])
        if 'kind' in event.get('involvedObject', {}):
            tags.append('object_type:%s' % event['involvedObject'].get('kind', '').lower())

        return tags

    def are_tags_filtered(self, tags):
        """
        Because it is a pain to call it from the kubernetes check otherwise.
        """
        return self.docker_util.are_tags_filtered(tags)

    @classmethod
    def get_auth_token(cls):
        """
        Return a string containing the authorization token for the pod.
        """
        try:
            with open(cls.AUTH_TOKEN_PATH) as f:
                return f.read()
        except IOError as e:
            log.error('Unable to read token from {}: {}'.format(cls.AUTH_TOKEN_PATH, e))

        return None
Exemple #10
0
class KubeUtil:
    __metaclass__ = Singleton

    DEFAULT_METHOD = 'http'
    KUBELET_HEALTH_PATH = '/healthz'
    MACHINE_INFO_PATH = '/api/v1.3/machine/'
    METRICS_PATH = '/api/v1.3/subcontainers/'
    PODS_LIST_PATH = '/pods/'
    DEFAULT_CADVISOR_PORT = 4194
    DEFAULT_HTTP_KUBELET_PORT = 10255
    DEFAULT_HTTPS_KUBELET_PORT = 10250
    DEFAULT_MASTER_PORT = 8080
    DEFAULT_MASTER_NAME = 'kubernetes'  # DNS name to reach the master from a pod.
    CA_CRT_PATH = '/var/run/secrets/kubernetes.io/serviceaccount/ca.crt'
    AUTH_TOKEN_PATH = '/var/run/secrets/kubernetes.io/serviceaccount/token'

    POD_NAME_LABEL = "io.kubernetes.pod.name"
    NAMESPACE_LABEL = "io.kubernetes.pod.namespace"

    def __init__(self, instance=None):
        self.docker_util = DockerUtil()
        if instance is None:
            try:
                config_file_path = get_conf_path(KUBERNETES_CHECK_NAME)
                check_config = check_yaml(config_file_path)
                instance = check_config['instances'][0]
            # kubernetes.yaml was not found
            except IOError as ex:
                log.error(ex.message)
                instance = {}
            except Exception:
                log.error(
                    'Kubernetes configuration file is invalid. '
                    'Trying connecting to kubelet with default settings anyway...'
                )
                instance = {}

        self.method = instance.get('method', KubeUtil.DEFAULT_METHOD)
        self._node_ip = self._node_name = None  # lazy evaluation
        self.host_name = os.environ.get('HOSTNAME')
        self.tls_settings = self._init_tls_settings(instance)

        # apiserver
        self.kubernetes_api_url = 'https://%s/api/v1' % (
            os.environ.get('KUBERNETES_SERVICE_HOST')
            or self.DEFAULT_MASTER_NAME)

        # kubelet
        try:
            self.kubelet_api_url = self._locate_kubelet(instance)
            if not self.kubelet_api_url:
                raise Exception(
                    "Couldn't find a method to connect to kubelet.")
        except Exception as ex:
            log.error(
                "Kubernetes check exiting, cannot run without access to kubelet."
            )
            raise ex

        self.kubelet_host = self.kubelet_api_url.split(':')[1].lstrip('/')
        self.pods_list_url = urljoin(self.kubelet_api_url,
                                     KubeUtil.PODS_LIST_PATH)
        self.kube_health_url = urljoin(self.kubelet_api_url,
                                       KubeUtil.KUBELET_HEALTH_PATH)

        # cadvisor
        self.cadvisor_port = instance.get('port',
                                          KubeUtil.DEFAULT_CADVISOR_PORT)
        self.cadvisor_url = '%s://%s:%d' % (self.method, self.kubelet_host,
                                            self.cadvisor_port)
        self.metrics_url = urljoin(self.cadvisor_url, KubeUtil.METRICS_PATH)
        self.machine_info_url = urljoin(self.cadvisor_url,
                                        KubeUtil.MACHINE_INFO_PATH)

        # keep track of the latest k8s event we collected and posted
        # default value is 0 but TTL for k8s events is one hour anyways
        self.last_event_collection_ts = 0

    def _init_tls_settings(self, instance):
        """
        Initialize TLS settings for connection to apiserver and kubelet.
        """
        tls_settings = {}

        # apiserver
        client_crt = instance.get('apiserver_client_crt')
        client_key = instance.get('apiserver_client_key')
        apiserver_cacert = instance.get('apiserver_ca_cert')

        if client_crt and client_key and os.path.exists(
                client_crt) and os.path.exists(client_key):
            tls_settings['apiserver_client_cert'] = (client_crt, client_key)

        if apiserver_cacert and os.path.exists(apiserver_cacert):
            tls_settings['apiserver_cacert'] = apiserver_cacert

        token = self.get_auth_token()
        if token:
            tls_settings['bearer_token'] = token

        # kubelet
        kubelet_client_crt = instance.get('kubelet_client_crt')
        kubelet_client_key = instance.get('kubelet_client_key')
        if kubelet_client_crt and kubelet_client_key and os.path.exists(
                kubelet_client_crt) and os.path.exists(kubelet_client_key):
            tls_settings['kubelet_client_cert'] = (kubelet_client_crt,
                                                   kubelet_client_key)

        cert = instance.get('kubelet_cert')
        if cert:
            tls_settings['kubelet_verify'] = cert
        else:
            tls_settings['kubelet_verify'] = instance.get(
                'kubelet_tls_verify', DEFAULT_TLS_VERIFY)

        return tls_settings

    def _locate_kubelet(self, instance):
        """
        Kubelet may or may not accept un-authenticated http requests.
        If it doesn't we need to use its HTTPS API that may or may not
        require auth.
        """
        host = os.environ.get('KUBERNETES_KUBELET_HOST') or instance.get(
            "host")
        if not host:
            # if no hostname was provided, use the docker hostname if cert
            # validation is not required, the kubernetes hostname otherwise.
            docker_hostname = self.docker_util.get_hostname(
                should_resolve=True)
            if self.tls_settings.get('kubelet_verify'):
                try:
                    k8s_hostname = self.get_node_hostname(docker_hostname)
                    host = k8s_hostname or docker_hostname
                except Exception as ex:
                    log.error(str(ex))
                    host = docker_hostname
            else:
                host = docker_hostname
        try:
            # check if the no-auth endpoint is enabled
            port = instance.get('kubelet_port',
                                KubeUtil.DEFAULT_HTTP_KUBELET_PORT)
            no_auth_url = 'http://%s:%s' % (host, port)
            test_url = urljoin(no_auth_url, KubeUtil.KUBELET_HEALTH_PATH)
            self.perform_kubelet_query(test_url)
            return no_auth_url
        except Exception:
            log.debug(
                "Couldn't query kubelet over HTTP, assuming it's not in no_auth mode."
            )

        port = instance.get('kubelet_port',
                            KubeUtil.DEFAULT_HTTPS_KUBELET_PORT)

        https_url = 'https://%s:%s' % (host, port)
        test_url = urljoin(https_url, KubeUtil.KUBELET_HEALTH_PATH)
        self.perform_kubelet_query(test_url)

        return https_url

    def get_node_hostname(self, host):
        """
        Query the API server for the kubernetes hostname of the node
        using the docker hostname as a filter.
        """
        node_filter = {'labelSelector': 'kubernetes.io/hostname=%s' % host}
        node = self.retrieve_json_auth(self.kubernetes_api_url +
                                       '/nodes?%s' % urlencode(node_filter))
        if len(node['items']) != 1:
            log.error(
                'Error while getting node hostname: expected 1 node, got %s.' %
                len(node['items']))
        else:
            addresses = (node or {}).get('items',
                                         [{}])[0].get('status',
                                                      {}).get('addresses', [])
            for address in addresses:
                if address.get('type') == 'Hostname':
                    return address['address']
        return None

    def get_kube_labels(self, excluded_keys=None):
        pods = self.retrieve_pods_list()
        return self.extract_kube_labels(pods, excluded_keys=excluded_keys)

    def extract_kube_labels(self, pods_list, excluded_keys=None):
        """
        Extract labels from a list of pods coming from
        the kubelet API.
        """
        excluded_keys = excluded_keys or []
        kube_labels = defaultdict(list)
        pod_items = pods_list.get("items") or []
        for pod in pod_items:
            metadata = pod.get("metadata", {})
            name = metadata.get("name")
            namespace = metadata.get("namespace")
            labels = metadata.get("labels")
            if name and labels and namespace:
                key = "%s/%s" % (namespace, name)

                for k, v in labels.iteritems():
                    if k in excluded_keys:
                        continue

                    kube_labels[key].append(u"kube_%s:%s" % (k, v))

        return kube_labels

    def retrieve_pods_list(self):
        """
        Retrieve the list of pods for this cluster querying the kubelet API.

        TODO: the list of pods could be cached with some policy to be decided.
        """
        return self.perform_kubelet_query(self.pods_list_url).json()

    def retrieve_machine_info(self):
        """
        Retrieve machine info from Cadvisor.
        """
        return retrieve_json(self.machine_info_url)

    def retrieve_metrics(self):
        """
        Retrieve metrics from Cadvisor.
        """
        return retrieve_json(self.metrics_url)

    def perform_kubelet_query(self, url, verbose=True, timeout=10):
        """
        Perform and return a GET request against kubelet. Support auth and TLS validation.
        """
        tls_context = self.tls_settings

        headers = None
        cert = tls_context.get('kubelet_client_cert')
        verify = tls_context.get('kubelet_verify', DEFAULT_TLS_VERIFY)

        # if cert-based auth is enabled, don't use the token.
        if not cert and url.lower().startswith('https'):
            headers = {
                'Authorization': 'Bearer {}'.format(self.get_auth_token())
            }

        return requests.get(url,
                            timeout=timeout,
                            verify=verify,
                            cert=cert,
                            headers=headers,
                            params={'verbose': verbose})

    def retrieve_json_auth(self, url, timeout=10, verify=None):
        """
        Kubernetes API requires authentication using a token available in
        every pod, or with a client X509 cert/key pair.
        We authenticate using the service account token by default
        and replace this behavior with cert authentication if the user provided
        a cert/key pair in the instance.

        We try to verify the server TLS cert if the public cert is available.
        """
        verify = self.tls_settings.get('apiserver_cacert')
        if not verify:
            verify = self.CA_CRT_PATH if os.path.exists(
                self.CA_CRT_PATH) else False
        log.debug('tls validation: {}'.format(verify))

        cert = self.tls_settings.get('apiserver_client_cert')
        bearer_token = self.tls_settings.get(
            'bearer_token') if not cert else None
        headers = {
            'Authorization': 'Bearer {}'.format(bearer_token)
        } if bearer_token else None

        r = requests.get(url,
                         timeout=timeout,
                         headers=headers,
                         verify=verify,
                         cert=cert)
        r.raise_for_status()
        return r.json()

    def get_node_info(self):
        """
        Return the IP address and the hostname of the node where the pod is running.
        """
        if None in (self._node_ip, self._node_name):
            self._fetch_host_data()
        return self._node_ip, self._node_name

    def _fetch_host_data(self):
        """
        Retrieve host name and IP address from the payload returned by the listing
        pods endpoints from kubelet.

        The host IP address is different from the default router for the pod.
        """
        try:
            pod_items = self.retrieve_pods_list().get("items") or []
        except Exception as e:
            log.warning(
                "Unable to retrieve pod list %s. Not fetching host data",
                str(e))
            return

        for pod in pod_items:
            metadata = pod.get("metadata", {})
            name = metadata.get("name")
            if name == self.host_name:
                status = pod.get('status', {})
                spec = pod.get('spec', {})
                # if not found, use an empty string - we use None as "not initialized"
                self._node_ip = status.get('hostIP', '')
                self._node_name = spec.get('nodeName', '')
                break

    def extract_event_tags(self, event):
        """
        Return a list of tags extracted from an event object
        """
        tags = []

        if 'reason' in event:
            tags.append('reason:%s' % event.get('reason', '').lower())
        if 'namespace' in event.get('metadata', {}):
            tags.append('namespace:%s' % event['metadata']['namespace'])
        if 'host' in event.get('source', {}):
            tags.append('node_name:%s' % event['source']['host'])
        if 'kind' in event.get('involvedObject', {}):
            tags.append('object_type:%s' %
                        event['involvedObject'].get('kind', '').lower())

        return tags

    def are_tags_filtered(self, tags):
        """
        Because it is a pain to call it from the kubernetes check otherwise.
        """
        return self.docker_util.are_tags_filtered(tags)

    @classmethod
    def get_auth_token(cls):
        """
        Return a string containing the authorization token for the pod.
        """
        try:
            with open(cls.AUTH_TOKEN_PATH) as f:
                return f.read()
        except IOError as e:
            log.error('Unable to read token from {}: {}'.format(
                cls.AUTH_TOKEN_PATH, e))

        return None
Exemple #11
0
def get_hostname(config=None):
    hostname = None

    if config is None:
        from config import get_config
        config = get_config(parse_args=True)
    config_hostname = config.get('hostname')
    if config_hostname and is_valid_hostname(config_hostname):
        print("type, {}".format(get_utf8(config_hostname)))
        return get_utf8(config_hostname)

    if hostname is None:
        gce_hostname = GCE.get_hostname(config)
        if gce_hostname is not None:
            if is_valid_hostname(gce_hostname):
                return gce_hostname[:64]

    docker_util = DockerUtil()
    if hostname is None and docker_util.is_dockerized():
        docker_hostname = docker_util.get_hostname()
        if docker_hostname is not None and is_valid_hostname(docker_hostname):
            hostname = docker_hostname[:64]

    if hostname is None:

        def _get_hostname_unix():
            try:
                out, _, rtcode = get_subprocess_output(['/bin/hostname', '-f'],
                                                       log)
                if rtcode == 0:
                    return out.strip()
            except Exception:
                return None

        os_name = get_os()
        if os_name in ['mac', 'freebsd', 'linux', 'solaris']:
            unix_hostname = _get_hostname_unix()
            if unix_hostname and is_valid_hostname(unix_hostname):
                hostname = unix_hostname

    if (Platform.is_ecs_instance()) or (hostname is not None
                                        and EC2.is_default(hostname)):
        instanceid = EC2.get_instance_id(config)
        if instanceid:
            hostname = instanceid

    if hostname is None:
        try:
            socket_hostname = socket.gethostname()
        except socket.error:
            socket_hostname = None
        if socket_hostname and is_valid_hostname(socket_hostname):
            hostname = socket_hostname

    if hostname is None:
        log.critical(
            'Unable to reliably determine host name. You can define one in datadog.conf or in your hosts file'
        )
        raise Exception(
            'Unable to reliably determine host name. You can define one in datadog.conf or in your hosts file'
        )
    else:
        return hostname
Exemple #12
0
class KubeUtil:
    __metaclass__ = Singleton

    DEFAULT_METHOD = 'http'
    KUBELET_HEALTH_PATH = '/healthz'
    MACHINE_INFO_PATH = '/api/v1.3/machine/'
    METRICS_PATH = '/api/v1.3/subcontainers/'
    PODS_LIST_PATH = '/pods/'
    DEFAULT_CADVISOR_PORT = 4194
    DEFAULT_HTTP_KUBELET_PORT = 10255
    DEFAULT_HTTPS_KUBELET_PORT = 10250
    DEFAULT_MASTER_PORT = 8080
    DEFAULT_MASTER_NAME = 'kubernetes'  # DNS name to reach the master from a pod.
    DEFAULT_LABEL_PREFIX = 'kube_'
    CA_CRT_PATH = '/var/run/secrets/kubernetes.io/serviceaccount/ca.crt'
    AUTH_TOKEN_PATH = '/var/run/secrets/kubernetes.io/serviceaccount/token'

    POD_NAME_LABEL = "io.kubernetes.pod.name"
    NAMESPACE_LABEL = "io.kubernetes.pod.namespace"

    def __init__(self, instance=None):
        self.docker_util = DockerUtil()
        if instance is None:
            try:
                config_file_path = get_conf_path(KUBERNETES_CHECK_NAME)
                check_config = check_yaml(config_file_path)
                instance = check_config['instances'][0]
            # kubernetes.yaml was not found
            except IOError as ex:
                log.error(ex.message)
                instance = {}
            except Exception:
                log.error(
                    'Kubernetes configuration file is invalid. '
                    'Trying connecting to kubelet with default settings anyway...'
                )
                instance = {}

        self.method = instance.get('method', KubeUtil.DEFAULT_METHOD)
        self._node_ip = self._node_name = None  # lazy evaluation
        self.host_name = os.environ.get('HOSTNAME')
        self.tls_settings = self._init_tls_settings(instance)

        # apiserver
        self.kubernetes_api_url = 'https://%s/api/v1' % (
            os.environ.get('KUBERNETES_SERVICE_HOST')
            or self.DEFAULT_MASTER_NAME)

        # kubelet
        try:
            self.kubelet_api_url = self._locate_kubelet(instance)
            if not self.kubelet_api_url:
                raise Exception(
                    "Couldn't find a method to connect to kubelet.")
        except Exception as ex:
            log.error(
                "Kubernetes check exiting, cannot run without access to kubelet."
            )
            raise ex

        # Service mapping helper class
        self._service_mapper = PodServiceMapper(self)

        self.kubelet_host = self.kubelet_api_url.split(':')[1].lstrip('/')
        self.pods_list_url = urljoin(self.kubelet_api_url,
                                     KubeUtil.PODS_LIST_PATH)
        self.kube_health_url = urljoin(self.kubelet_api_url,
                                       KubeUtil.KUBELET_HEALTH_PATH)
        self.kube_label_prefix = instance.get('label_to_tag_prefix',
                                              KubeUtil.DEFAULT_LABEL_PREFIX)

        # cadvisor
        self.cadvisor_port = instance.get('port',
                                          KubeUtil.DEFAULT_CADVISOR_PORT)
        self.cadvisor_url = '%s://%s:%d' % (self.method, self.kubelet_host,
                                            self.cadvisor_port)
        self.metrics_url = urljoin(self.cadvisor_url, KubeUtil.METRICS_PATH)
        self.machine_info_url = urljoin(self.cadvisor_url,
                                        KubeUtil.MACHINE_INFO_PATH)

        # keep track of the latest k8s event we collected and posted
        # default value is 0 but TTL for k8s events is one hour anyways
        self.last_event_collection_ts = 0

    def _init_tls_settings(self, instance):
        """
        Initialize TLS settings for connection to apiserver and kubelet.
        """
        tls_settings = {}

        # apiserver
        client_crt = instance.get('apiserver_client_crt')
        client_key = instance.get('apiserver_client_key')
        apiserver_cacert = instance.get('apiserver_ca_cert')

        if client_crt and client_key and os.path.exists(
                client_crt) and os.path.exists(client_key):
            tls_settings['apiserver_client_cert'] = (client_crt, client_key)

        if apiserver_cacert and os.path.exists(apiserver_cacert):
            tls_settings['apiserver_cacert'] = apiserver_cacert

        token = self.get_auth_token()
        if token:
            tls_settings['bearer_token'] = token

        # kubelet
        kubelet_client_crt = instance.get('kubelet_client_crt')
        kubelet_client_key = instance.get('kubelet_client_key')
        if kubelet_client_crt and kubelet_client_key and os.path.exists(
                kubelet_client_crt) and os.path.exists(kubelet_client_key):
            tls_settings['kubelet_client_cert'] = (kubelet_client_crt,
                                                   kubelet_client_key)

        cert = instance.get('kubelet_cert')
        if cert:
            tls_settings['kubelet_verify'] = cert
        else:
            tls_settings['kubelet_verify'] = instance.get(
                'kubelet_tls_verify', DEFAULT_TLS_VERIFY)

        return tls_settings

    def _locate_kubelet(self, instance):
        """
        Kubelet may or may not accept un-authenticated http requests.
        If it doesn't we need to use its HTTPS API that may or may not
        require auth.
        """
        host = os.environ.get('KUBERNETES_KUBELET_HOST') or instance.get(
            "host")
        if not host:
            # if no hostname was provided, use the docker hostname if cert
            # validation is not required, the kubernetes hostname otherwise.
            docker_hostname = self.docker_util.get_hostname(
                should_resolve=True)
            if self.tls_settings.get('kubelet_verify'):
                try:
                    k8s_hostname = self.get_node_hostname(docker_hostname)
                    host = k8s_hostname or docker_hostname
                except Exception as ex:
                    log.error(str(ex))
                    host = docker_hostname
            else:
                host = docker_hostname
        try:
            # check if the no-auth endpoint is enabled
            port = instance.get('kubelet_port',
                                KubeUtil.DEFAULT_HTTP_KUBELET_PORT)
            no_auth_url = 'http://%s:%s' % (host, port)
            test_url = urljoin(no_auth_url, KubeUtil.KUBELET_HEALTH_PATH)
            self.perform_kubelet_query(test_url)
            return no_auth_url
        except Exception:
            log.debug(
                "Couldn't query kubelet over HTTP, assuming it's not in no_auth mode."
            )

        port = instance.get('kubelet_port',
                            KubeUtil.DEFAULT_HTTPS_KUBELET_PORT)

        https_url = 'https://%s:%s' % (host, port)
        test_url = urljoin(https_url, KubeUtil.KUBELET_HEALTH_PATH)
        self.perform_kubelet_query(test_url)

        return https_url

    def get_node_hostname(self, host):
        """
        Query the API server for the kubernetes hostname of the node
        using the docker hostname as a filter.
        """
        node_filter = {'labelSelector': 'kubernetes.io/hostname=%s' % host}
        node = self.retrieve_json_auth(self.kubernetes_api_url +
                                       '/nodes?%s' % urlencode(node_filter))
        if len(node['items']) != 1:
            log.error(
                'Error while getting node hostname: expected 1 node, got %s.' %
                len(node['items']))
        else:
            addresses = (node or {}).get('items',
                                         [{}])[0].get('status',
                                                      {}).get('addresses', [])
            for address in addresses:
                if address.get('type') == 'Hostname':
                    return address['address']
        return None

    def get_kube_pod_tags(self, excluded_keys=None):
        """
        Gets pods' labels as tags + creator and service tags.
        Returns a dict{namespace/podname: [tags]}
        """
        pods = self.retrieve_pods_list()
        return self.extract_kube_pod_tags(pods, excluded_keys=excluded_keys)

    def extract_kube_pod_tags(self,
                              pods_list,
                              excluded_keys=None,
                              label_prefix=None):
        """
        Extract labels + creator and service tags from a list of
        pods coming from the kubelet API.

        :param excluded_keys: labels to skip
        :param label_prefix: prefix for label->tag conversion, None defaults
        to the configuration option label_to_tag_prefix
        Returns a dict{namespace/podname: [tags]}
        """
        excluded_keys = excluded_keys or []
        kube_labels = defaultdict(list)
        pod_items = pods_list.get("items") or []
        label_prefix = label_prefix or self.kube_label_prefix
        for pod in pod_items:
            metadata = pod.get("metadata", {})
            name = metadata.get("name")
            namespace = metadata.get("namespace")
            labels = metadata.get("labels", {})
            if name and namespace:
                key = "%s/%s" % (namespace, name)

                # Extract creator tags
                podtags = self.get_pod_creator_tags(metadata)

                # Extract services tags
                for service in self.match_services_for_pod(metadata):
                    if service is not None:
                        podtags.append(u'kube_service:%s' % service)

                # Extract labels
                for k, v in labels.iteritems():
                    if k in excluded_keys:
                        continue
                    podtags.append(u"%s%s:%s" % (label_prefix, k, v))

                kube_labels[key] = podtags

        return kube_labels

    def retrieve_pods_list(self):
        """
        Retrieve the list of pods for this cluster querying the kubelet API.

        TODO: the list of pods could be cached with some policy to be decided.
        """
        return self.perform_kubelet_query(self.pods_list_url).json()

    def retrieve_machine_info(self):
        """
        Retrieve machine info from Cadvisor.
        """
        return retrieve_json(self.machine_info_url)

    def retrieve_metrics(self):
        """
        Retrieve metrics from Cadvisor.
        """
        return retrieve_json(self.metrics_url)

    def get_deployment_for_replicaset(self, rs_name):
        """
        Get the deployment name for a given replicaset name
        For now, the rs name's first part always is the deployment's name, see
        https://github.com/kubernetes/kubernetes/blob/release-1.6/pkg/controller/deployment/sync.go#L299
        But it might change in a future k8s version. The other way to match RS and deployments is
        to parse and cache /apis/extensions/v1beta1/replicasets, mirroring PodServiceMapper
        """
        end = rs_name.rfind("-")
        if end > 0 and rs_name[end + 1:].isdigit():
            return rs_name[0:end]
        else:
            return None

    def perform_kubelet_query(self, url, verbose=True, timeout=10):
        """
        Perform and return a GET request against kubelet. Support auth and TLS validation.
        """
        tls_context = self.tls_settings

        headers = None
        cert = tls_context.get('kubelet_client_cert')
        verify = tls_context.get('kubelet_verify', DEFAULT_TLS_VERIFY)

        # if cert-based auth is enabled, don't use the token.
        if not cert and url.lower().startswith('https'):
            headers = {
                'Authorization': 'Bearer {}'.format(self.get_auth_token())
            }

        return requests.get(url,
                            timeout=timeout,
                            verify=verify,
                            cert=cert,
                            headers=headers,
                            params={'verbose': verbose})

    def retrieve_json_auth(self, url, timeout=10, verify=None, params=None):
        """
        Kubernetes API requires authentication using a token available in
        every pod, or with a client X509 cert/key pair.
        We authenticate using the service account token by default
        and replace this behavior with cert authentication if the user provided
        a cert/key pair in the instance.

        We try to verify the server TLS cert if the public cert is available.
        """
        verify = self.tls_settings.get('apiserver_cacert')
        if not verify:
            verify = self.CA_CRT_PATH if os.path.exists(
                self.CA_CRT_PATH) else False
        log.debug('tls validation: {}'.format(verify))

        cert = self.tls_settings.get('apiserver_client_cert')
        bearer_token = self.tls_settings.get(
            'bearer_token') if not cert else None
        headers = {
            'Authorization': 'Bearer {}'.format(bearer_token)
        } if bearer_token else None

        r = requests.get(url,
                         timeout=timeout,
                         headers=headers,
                         verify=verify,
                         cert=cert,
                         params=params)
        r.raise_for_status()
        return r.json()

    def get_node_info(self):
        """
        Return the IP address and the hostname of the node where the pod is running.
        """
        if None in (self._node_ip, self._node_name):
            self._fetch_host_data()
        return self._node_ip, self._node_name

    def _fetch_host_data(self):
        """
        Retrieve host name and IP address from the payload returned by the listing
        pods endpoints from kubelet.

        The host IP address is different from the default router for the pod.
        """
        try:
            pod_items = self.retrieve_pods_list().get("items") or []
        except Exception as e:
            log.warning(
                "Unable to retrieve pod list %s. Not fetching host data",
                str(e))
            return

        for pod in pod_items:
            metadata = pod.get("metadata", {})
            name = metadata.get("name")
            if name == self.host_name:
                status = pod.get('status', {})
                spec = pod.get('spec', {})
                # if not found, use an empty string - we use None as "not initialized"
                self._node_ip = status.get('hostIP', '')
                self._node_name = spec.get('nodeName', '')
                break

    def extract_event_tags(self, event):
        """
        Return a list of tags extracted from an event object
        """
        tags = []

        if 'reason' in event:
            tags.append('reason:%s' % event.get('reason', '').lower())
        if 'namespace' in event.get('metadata', {}):
            tags.append('namespace:%s' % event['metadata']['namespace'])
        if 'host' in event.get('source', {}):
            tags.append('node_name:%s' % event['source']['host'])
        if 'kind' in event.get('involvedObject', {}):
            tags.append('object_type:%s' %
                        event['involvedObject'].get('kind', '').lower())

        return tags

    def are_tags_filtered(self, tags):
        """
        Because it is a pain to call it from the kubernetes check otherwise.
        """
        return self.docker_util.are_tags_filtered(tags)

    @classmethod
    def get_auth_token(cls):
        """
        Return a string containing the authorization token for the pod.
        """
        try:
            with open(cls.AUTH_TOKEN_PATH) as f:
                return f.read()
        except IOError as e:
            log.error('Unable to read token from {}: {}'.format(
                cls.AUTH_TOKEN_PATH, e))

        return None

    def check_services_cache_freshness(self):
        """
        Entry point for sd_docker_backend to check whether to invalidate the cached services
        For now, we remove the whole cache as the fill_service_cache logic
        doesn't handle partial lookups

        We use the event's resourceVersion, as using the service's version wouldn't catch deletion
        """
        return self._service_mapper.check_services_cache_freshness()

    def match_services_for_pod(self, pod_metadata, refresh=False):
        """
        Match the pods labels with services' label selectors to determine the list
        of services that point to that pod. Returns an array of service names.

        Pass refresh=True if you want to bypass the cached cid->services mapping (after a service change)
        """
        s = self._service_mapper.match_services_for_pod(pod_metadata,
                                                        refresh,
                                                        names=True)
        #log.warning("Matches for %s: %s" % (pod_metadata.get('name'), str(s)))
        return s

    def get_event_retriever(self, namespaces=None, kinds=None):
        """
        Returns a KubeEventRetriever object ready for action
        """
        return KubeEventRetriever(self, namespaces, kinds)

    def match_containers_for_pods(self, pod_uids, podlist=None):
        """
        Reads a set of pod uids and returns the set of docker
        container ids they manage
        podlist should be a recent self.retrieve_pods_list return value,
        if not given that method will be called
        """
        cids = set()

        if not isinstance(pod_uids, set) or len(pod_uids) < 1:
            return cids

        if podlist is None:
            podlist = self.retrieve_pods_list()

        for pod in podlist.get('items', {}):
            uid = pod.get('metadata', {}).get('uid', None)
            if uid in pod_uids:
                for container in pod.get('status',
                                         {}).get('containerStatuses', None):
                    id = container.get('containerID', "")
                    if id.startswith("docker://"):
                        cids.add(id[9:])

        return cids

    def get_pod_creator(self, pod_metadata):
        """
        Get the pod's creator from its metadata and returns a
        tuple (creator_kind, creator_name)

        This allows for consitency across code path
        """
        try:
            created_by = json.loads(
                pod_metadata['annotations']['kubernetes.io/created-by'])
            creator_kind = created_by.get('reference', {}).get('kind')
            creator_name = created_by.get('reference', {}).get('name')
            return (creator_kind, creator_name)
        except Exception:
            log.debug('Could not parse creator for pod ' +
                      pod_metadata.get('name', ''))
            return (None, None)

    def get_pod_creator_tags(self,
                             pod_metadata,
                             legacy_rep_controller_tag=False):
        """
        Get the pod's creator from its metadata and returns a list of tags
        in the form kube_$kind:$name, ready to add to the metrics
        """
        try:
            tags = []
            creator_kind, creator_name = self.get_pod_creator(pod_metadata)
            if creator_kind in CREATOR_KIND_TO_TAG and creator_name:
                tags.append("%s:%s" %
                            (CREATOR_KIND_TO_TAG[creator_kind], creator_name))
                if creator_kind == 'ReplicaSet':
                    deployment = self.get_deployment_for_replicaset(
                        creator_name)
                    if deployment:
                        tags.append(
                            "%s:%s" %
                            (CREATOR_KIND_TO_TAG['Deployment'], deployment))
            if legacy_rep_controller_tag and creator_kind != 'ReplicationController' and creator_name:
                tags.append(
                    'kube_replication_controller:{0}'.format(creator_name))

            return tags
        except Exception:
            log.warning('Could not parse creator tags for pod ' +
                        pod_metadata.get('name'))
            return []

    def process_events(self, event_array, podlist=None):
        """
        Reads a list of kube events, invalidates caches and and computes a set
        of containers impacted by the changes, to refresh service discovery
        Pod creation/deletion events are ignored for now, as docker_daemon already
        sends container creation/deletion events to SD

        Pod->containers matching is done using match_containers_for_pods
        """
        try:
            pods = set()
            if self._service_mapper:
                pods.update(self._service_mapper.process_events(event_array))
            return self.match_containers_for_pods(pods, podlist)
        except Exception as e:
            log.warning("Error processing events %s: %s" %
                        (str(event_array), e))
            return set()
Exemple #13
0
class KubeUtil:
    __metaclass__ = Singleton

    DEFAULT_METHOD = 'http'
    KUBELET_HEALTH_PATH = '/healthz'
    MACHINE_INFO_PATH = '/api/v1.3/machine/'
    METRICS_PATH = '/api/v1.3/subcontainers/'
    PODS_LIST_PATH = '/pods/'
    DEFAULT_CADVISOR_PORT = 4194
    DEFAULT_HTTP_KUBELET_PORT = 10255
    DEFAULT_HTTPS_KUBELET_PORT = 10250
    DEFAULT_MASTER_PORT = 443
    DEFAULT_MASTER_NAME = 'kubernetes'  # DNS name to reach the master from a pod.
    DEFAULT_LABEL_PREFIX = 'kube_'
    DEFAULT_COLLECT_SERVICE_TAG = True
    CA_CRT_PATH = '/var/run/secrets/kubernetes.io/serviceaccount/ca.crt'
    AUTH_TOKEN_PATH = '/var/run/secrets/kubernetes.io/serviceaccount/token'

    POD_NAME_LABEL = "io.kubernetes.pod.name"
    NAMESPACE_LABEL = "io.kubernetes.pod.namespace"
    CONTAINER_NAME_LABEL = "io.kubernetes.container.name"

    def __init__(self, **kwargs):
        self.docker_util = DockerUtil()
        if 'init_config' in kwargs and 'instance' in kwargs:
            init_config = kwargs.get('init_config', {})
            instance = kwargs.get('instance', {})
        else:
            try:
                config_file_path = get_conf_path(KUBERNETES_CHECK_NAME)
                check_config = check_yaml(config_file_path)
                init_config = check_config['init_config'] or {}
                instance = check_config['instances'][0] or {}
            # kubernetes.yaml was not found
            except IOError as ex:
                log.error(ex.message)
                init_config, instance = {}, {}
            except Exception:
                log.error('Kubernetes configuration file is invalid. '
                          'Trying connecting to kubelet with default settings anyway...')
                init_config, instance = {}, {}

        self.method = instance.get('method', KubeUtil.DEFAULT_METHOD)
        self._node_ip = self._node_name = None  # lazy evaluation
        self.host_name = os.environ.get('HOSTNAME')
        self.pod_name = os.environ.get('KUBERNETES_POD_NAME') or self.host_name
        self.tls_settings = self._init_tls_settings(instance)

        # apiserver
        if 'api_server_url' in instance:
            self.kubernetes_api_root_url = instance.get('api_server_url')
        else:
            master_host = os.environ.get('KUBERNETES_SERVICE_HOST') or self.DEFAULT_MASTER_NAME
            master_port = os.environ.get('KUBERNETES_SERVICE_PORT') or self.DEFAULT_MASTER_PORT
            self.kubernetes_api_root_url = 'https://%s:%s' % (master_host, master_port)

        self.kubernetes_api_url = '%s/api/v1' % self.kubernetes_api_root_url

        # Service mapping helper class
        self._service_mapper = PodServiceMapper(self)
        from config import _is_affirmative
        self.collect_service_tag = _is_affirmative(instance.get('collect_service_tags', KubeUtil.DEFAULT_COLLECT_SERVICE_TAG))


        # leader status triggers event collection
        self.is_leader = False
        self.leader_elector = None
        self.leader_lease_duration = instance.get('leader_lease_duration')

        # kubelet
        # If kubelet_api_url is None, init_kubelet didn't succeed yet.
        self.init_success = False
        self.kubelet_api_url = None
        self.init_retry_interval = init_config.get('init_retry_interval', DEFAULT_RETRY_INTERVAL)
        self.last_init_retry = None
        self.left_init_retries = init_config.get('init_retries', DEFAULT_INIT_RETRIES) + 1
        self.init_kubelet(instance)

        self.kube_label_prefix = instance.get('label_to_tag_prefix', KubeUtil.DEFAULT_LABEL_PREFIX)
        self.kube_node_labels = instance.get('node_labels_to_host_tags', {})

        # keep track of the latest k8s event we collected and posted
        # default value is 0 but TTL for k8s events is one hour anyways
        self.last_event_collection_ts = 0

    def _init_tls_settings(self, instance):
        """
        Initialize TLS settings for connection to apiserver and kubelet.
        """
        tls_settings = {}

        # apiserver
        client_crt = instance.get('apiserver_client_crt')
        client_key = instance.get('apiserver_client_key')
        apiserver_cacert = instance.get('apiserver_ca_cert')

        if client_crt and client_key and os.path.exists(client_crt) and os.path.exists(client_key):
            tls_settings['apiserver_client_cert'] = (client_crt, client_key)

        if apiserver_cacert and os.path.exists(apiserver_cacert):
            tls_settings['apiserver_cacert'] = apiserver_cacert

        # kubelet
        kubelet_client_crt = instance.get('kubelet_client_crt')
        kubelet_client_key = instance.get('kubelet_client_key')
        if kubelet_client_crt and kubelet_client_key and os.path.exists(kubelet_client_crt) and os.path.exists(kubelet_client_key):
            tls_settings['kubelet_client_cert'] = (kubelet_client_crt, kubelet_client_key)

        cert = instance.get('kubelet_cert')
        if cert:
            tls_settings['kubelet_verify'] = cert
        else:
            tls_settings['kubelet_verify'] = instance.get('kubelet_tls_verify', DEFAULT_TLS_VERIFY)

        if ('apiserver_client_cert' not in tls_settings) or ('kubelet_client_cert' not in tls_settings):
            # Only lookup token if we don't have client certs for both
            token = self.get_auth_token(instance)
            if token:
                tls_settings['bearer_token'] = token

        return tls_settings

    def init_kubelet(self, instance):
        """
        Handles the retry logic around _locate_kubelet.
        Once _locate_kubelet succeeds, initialize all kubelet-related
        URLs and settings.
        """
        if self.left_init_retries == 0:
            raise Exception("Kubernetes client initialization failed permanently. "
                "Kubernetes-related features will fail.")

        now = time.time()

        # last retry was less than retry_interval ago
        if self.last_init_retry and now <= self.last_init_retry + self.init_retry_interval:
            return
        # else it's the first try, or last retry was long enough ago
        self.last_init_retry = now
        self.left_init_retries -= 1

        try:
            self.kubelet_api_url = self._locate_kubelet(instance)
        except Exception as ex:
            log.error("Failed to initialize kubelet connection. Will retry %s time(s). Error: %s" % (self.left_init_retries, str(ex)))
            return
        if not self.kubelet_api_url:
            log.error("Failed to initialize kubelet connection. Will retry %s time(s)." % self.left_init_retries)
            return

        self.init_success = True

        self.kubelet_host = self.kubelet_api_url.split(':')[1].lstrip('/')
        self.pods_list_url = urljoin(self.kubelet_api_url, KubeUtil.PODS_LIST_PATH)
        self.kube_health_url = urljoin(self.kubelet_api_url, KubeUtil.KUBELET_HEALTH_PATH)

        # namespace of the agent pod
        try:
            self.self_namespace = self.get_self_namespace()
        except Exception:
            log.warning("Failed to get the agent pod namespace, defaulting to default.")
            self.self_namespace = DEFAULT_NAMESPACE

        # cadvisor
        self.cadvisor_port = instance.get('port', KubeUtil.DEFAULT_CADVISOR_PORT)
        self.cadvisor_url = '%s://%s:%d' % (self.method, self.kubelet_host, self.cadvisor_port)
        self.metrics_url = urljoin(self.cadvisor_url, KubeUtil.METRICS_PATH)
        self.machine_info_url = urljoin(self.cadvisor_url, KubeUtil.MACHINE_INFO_PATH)

    def _locate_kubelet(self, instance):
        """
        Kubelet may or may not accept un-authenticated http requests.
        If it doesn't we need to use its HTTPS API that may or may not
        require auth.
        Returns the kubelet URL or raises.
        """
        host = os.environ.get('KUBERNETES_KUBELET_HOST') or instance.get("host")
        if not host:
            # if no hostname was provided, use the docker hostname if cert
            # validation is not required, the kubernetes hostname otherwise.
            docker_hostname = self.docker_util.get_hostname(should_resolve=True)
            if self.tls_settings.get('kubelet_verify'):
                try:
                    k8s_hostname = self.get_node_hostname(docker_hostname)
                    host = k8s_hostname or docker_hostname
                except Exception as ex:
                    log.error(str(ex))
                    host = docker_hostname
            else:
                host = docker_hostname

        # check if the no-auth endpoint is enabled
        port = instance.get('kubelet_port', KubeUtil.DEFAULT_HTTP_KUBELET_PORT)
        no_auth_url = 'http://%s:%s' % (host, port)
        test_url = urljoin(no_auth_url, KubeUtil.KUBELET_HEALTH_PATH)
        try:
            self.perform_kubelet_query(test_url)
            return no_auth_url
        except Exception:
            log.debug("Couldn't query kubelet over HTTP, assuming it's not in no_auth mode.")

        port = instance.get('kubelet_port', KubeUtil.DEFAULT_HTTPS_KUBELET_PORT)
        https_url = 'https://%s:%s' % (host, port)
        test_url = urljoin(https_url, KubeUtil.KUBELET_HEALTH_PATH)
        try:
            self.perform_kubelet_query(test_url)
            return https_url
        except Exception as ex:
            log.warning("Couldn't query kubelet over HTTP, assuming it's not in no_auth mode.")
            raise ex

    def get_self_namespace(self):
        pods = self.retrieve_pods_list()
        for pod in pods.get('items', []):
            if pod.get('metadata', {}).get('name') == self.pod_name:
                return pod['metadata']['namespace']
        log.warning("Couldn't find the agent pod and namespace, using the default.")
        return DEFAULT_NAMESPACE

    def get_node_hostname(self, host):
        """
        Query the API server for the kubernetes hostname of the node
        using the docker hostname as a filter.
        """
        node_filter = {'labelSelector': 'kubernetes.io/hostname=%s' % host}
        node = self.retrieve_json_auth(
            self.kubernetes_api_url + '/nodes?%s' % urlencode(node_filter)
        ).json()
        if len(node['items']) != 1:
            log.error('Error while getting node hostname: expected 1 node, got %s.' % len(node['items']))
        else:
            addresses = (node or {}).get('items', [{}])[0].get('status', {}).get('addresses', [])
            for address in addresses:
                if address.get('type') == 'Hostname':
                    return address['address']
        return None

    def get_kube_pod_tags(self, excluded_keys=None):
        """
        Gets pods' labels as tags + creator and service tags.
        Returns a dict{namespace/podname: [tags]}
        """
        if not self.init_success:
            log.warning("Kubernetes client is not initialized, can't get pod tags.")
            return {}
        pods = self.retrieve_pods_list()
        return self.extract_kube_pod_tags(pods, excluded_keys=excluded_keys)

    def extract_kube_pod_tags(self, pods_list, excluded_keys=None, label_prefix=None):
        """
        Extract labels + creator and service tags from a list of
        pods coming from the kubelet API.

        :param excluded_keys: labels to skip
        :param label_prefix: prefix for label->tag conversion, None defaults
        to the configuration option label_to_tag_prefix
        Returns a dict{namespace/podname: [tags]}
        """
        excluded_keys = excluded_keys or []
        kube_labels = defaultdict(list)
        pod_items = pods_list.get("items") or []
        label_prefix = label_prefix or self.kube_label_prefix
        for pod in pod_items:
            metadata = pod.get("metadata", {})
            name = metadata.get("name")
            namespace = metadata.get("namespace")
            labels = metadata.get("labels", {})
            if name and namespace:
                key = "%s/%s" % (namespace, name)

                # Extract creator tags
                podtags = self.get_pod_creator_tags(metadata)

                # Extract services tags
                if self.collect_service_tag:
                    for service in self.match_services_for_pod(metadata):
                        if service is not None:
                            podtags.append(u'kube_service:%s' % service)

                # Extract labels
                for k, v in labels.iteritems():
                    if k in excluded_keys:
                        continue
                    podtags.append(u"%s%s:%s" % (label_prefix, k, v))

                kube_labels[key] = podtags

        return kube_labels

    def retrieve_pods_list(self):
        """
        Retrieve the list of pods for this cluster querying the kubelet API.

        TODO: the list of pods could be cached with some policy to be decided.
        """
        return self.perform_kubelet_query(self.pods_list_url).json()

    def retrieve_machine_info(self):
        """
        Retrieve machine info from Cadvisor.
        """
        return retrieve_json(self.machine_info_url)

    def retrieve_metrics(self):
        """
        Retrieve metrics from Cadvisor.
        """
        return retrieve_json(self.metrics_url)

    def get_deployment_for_replicaset(self, rs_name):
        """
        Get the deployment name for a given replicaset name
        For now, the rs name's first part always is the deployment's name, see
        https://github.com/kubernetes/kubernetes/blob/release-1.6/pkg/controller/deployment/sync.go#L299
        But it might change in a future k8s version. The other way to match RS and deployments is
        to parse and cache /apis/extensions/v1beta1/replicasets, mirroring PodServiceMapper
        In 1.8, the hash generation logic changed: https://github.com/kubernetes/kubernetes/pull/51538/files

        As none of these naming schemes have guaranteed suffix lenghts, we have to be pretty permissive
        in what kind of suffix we match. That can lead to false positives, although their impact would
        be limited (erroneous kube_deployment tag, but the kube_replica_set tag will be present).
        For example, the hardcoded replicaset name prefix-34 or prefix-cfd will match.

        For agent6, we plan on doing this pod->replicaset->deployment matching in the cluster agent, with
        replicaset data from the apiserver. This will address that risk.
        """
        end = rs_name.rfind("-")
        if end > 0 and rs_name[end + 1:].isdigit():
            # k8s before 1.8
            return rs_name[0:end]
        if end > 0 and len(rs_name[end + 1:]) > 2:
            # k8s 1.8+ maybe? Check contents
            for char in rs_name[end + 1:]:
                if char not in ALLOWED_ENCODESTRING_ALPHANUMS:
                    return None
            return rs_name[0:end]
        else:
            return None

    def perform_kubelet_query(self, url, verbose=True, timeout=10):
        """
        Perform and return a GET request against kubelet. Support auth and TLS validation.
        """
        tls_context = self.tls_settings

        headers = None
        cert = tls_context.get('kubelet_client_cert')
        verify = tls_context.get('kubelet_verify', DEFAULT_TLS_VERIFY)

        # if cert-based auth is enabled, don't use the token.
        if not cert and url.lower().startswith('https') and 'bearer_token' in self.tls_settings:
            headers = {'Authorization': 'Bearer {}'.format(self.tls_settings.get('bearer_token'))}

        return requests.get(url, timeout=timeout, verify=verify,
            cert=cert, headers=headers, params={'verbose': verbose})

    def get_apiserver_auth_settings(self):
        """
        Kubernetes API requires authentication using a token available in
        every pod, or with a client X509 cert/key pair.
        We authenticate using the service account token by default
        and replace this behavior with cert authentication if the user provided
        a cert/key pair in the instance.

        We try to verify the server TLS cert if the public cert is available.
        """
        verify = self.tls_settings.get('apiserver_cacert')
        if not verify:
            verify = self.CA_CRT_PATH if os.path.exists(self.CA_CRT_PATH) else False
        log.debug('tls validation: {}'.format(verify))

        cert = self.tls_settings.get('apiserver_client_cert')
        bearer_token = self.tls_settings.get('bearer_token') if not cert else None
        headers = {'Authorization': 'Bearer {}'.format(bearer_token)} if bearer_token else {}
        headers['content-type'] = 'application/json'
        return cert, headers, verify

    def retrieve_json_auth(self, url, params=None, timeout=3):
        cert, headers, verify = self.get_apiserver_auth_settings()
        res = requests.get(url, timeout=timeout, headers=headers, verify=verify, cert=cert, params=params)
        res.raise_for_status()
        return res

    def post_json_to_apiserver(self, url, data, timeout=3):
        cert, headers, verify = self.get_apiserver_auth_settings()
        res = requests.post(url, timeout=timeout, headers=headers, verify=verify, cert=cert, data=json.dumps(data))
        res.raise_for_status()
        return res

    def put_json_to_apiserver(self, url, data, timeout=3):
        cert, headers, verify = self.get_apiserver_auth_settings()
        res = requests.put(url, timeout=timeout, headers=headers, verify=verify, cert=cert, data=json.dumps(data))
        res.raise_for_status()
        return res

    def delete_to_apiserver(self, url, timeout=3):
        cert, headers, verify = self.get_apiserver_auth_settings()
        res = requests.delete(url, timeout=timeout, headers=headers, verify=verify, cert=cert)
        res.raise_for_status()
        return res

    def get_node_info(self):
        """
        Return the IP address and the hostname of the node where the pod is running.
        """
        if None in (self._node_ip, self._node_name):
            self._fetch_host_data()
        return self._node_ip, self._node_name

    def get_node_metadata(self):
        """Returns host metadata about the local k8s node"""
        meta = {}

        # API server version
        try:
            request_url = "%s/version" % self.kubernetes_api_root_url
            master_info = self.retrieve_json_auth(request_url).json()
            version = master_info.get("gitVersion")
            meta['kube_master_version'] = version[1:]
        except Exception as ex:
            # Intentional use of non-safe lookups to get the exception in the debug logs
            # if the parsing were to fail
            log.debug("Error getting Kube master version: %s" % str(ex))

        # Kubelet version & labels
        if not self.init_success:
            log.warning("Kubelet client failed to initialize, kubelet host tags will be missing for now.")
            return meta
        try:
            _, node_name = self.get_node_info()
            if not node_name:
                raise ValueError("node name missing or empty")
            request_url = "%s/nodes/%s" % (self.kubernetes_api_url, node_name)
            node_info = self.retrieve_json_auth(request_url).json()
            version = node_info.get("status").get("nodeInfo").get("kubeletVersion")
            meta['kubelet_version'] = version[1:]
        except Exception as ex:
            log.debug("Error getting Kubelet version: %s" % str(ex))

        return meta


    def get_node_hosttags(self):
        """
        Returns node labels as tags. Tag name is transformed as defined
        in node_labels_to_host_tags in the kubernetes check configuration.
        Note: queries the API server for node info. Configure RBAC accordingly.
        """
        tags = []

        try:
            _, node_name = self.get_node_info()
            if not node_name:
                raise ValueError("node name missing or empty")

            request_url = "%s/nodes/%s" % (self.kubernetes_api_url, node_name)
            node_info = self.retrieve_json_auth(request_url).json()
            node_labels = node_info.get('metadata', {}).get('labels', {})

            for l_name, t_name in self.kube_node_labels.iteritems():
                if l_name in node_labels:
                    tags.append('%s:%s' % (t_name, node_labels[l_name]))

        except Exception as ex:
            log.debug("Error getting node labels: %s" % str(ex))

        return tags

    def _fetch_host_data(self):
        """
        Retrieve host name and IP address from the payload returned by the listing
        pods endpoints from kubelet.

        The host IP address is different from the default router for the pod.
        """
        try:
            pod_items = self.retrieve_pods_list().get("items") or []
        except Exception as e:
            log.warning("Unable to retrieve pod list %s. Not fetching host data", str(e))
            return

        # Take the first Pod with a status:
        # all running pods have the adapted '.spec.nodeName'
        # static pods doesn't have the '.status.hostIP'
        for pod in pod_items:
            node_name = pod.get('spec', {}).get('nodeName', '')
            if not self._node_name and node_name:
                self._node_name = node_name

            # hostIP is not fill on static Pods
            host_ip = pod.get('status', {}).get('hostIP', '')
            if not self._node_ip and host_ip:
                self._node_ip = host_ip

            if self._node_name and self._node_ip:
                return

        log.warning("Cannot set both node_name: '%s' and node_ip: '%s' from PodList with %d items",
                    self._node_name, self._node_ip, len(pod_items))

    def extract_event_tags(self, event):
        """
        Return a list of tags extracted from an event object
        """
        tags = []

        if 'reason' in event:
            tags.append('reason:%s' % event.get('reason', '').lower())
        if 'namespace' in event.get('metadata', {}):
            tags.append('namespace:%s' % event['metadata']['namespace'])
        if 'host' in event.get('source', {}):
            tags.append('node_name:%s' % event['source']['host'])
        if 'kind' in event.get('involvedObject', {}):
            tags.append('object_type:%s' % event['involvedObject'].get('kind', '').lower())
        if 'name' in event.get('involvedObject', {}):
            tags.append('object_name:%s' % event['involvedObject'].get('name','').lower())
        if 'component' in event.get('source', {}):
            tags.append('source_component:%s' % event['source'].get('component','').lower())

        return tags

    def are_tags_filtered(self, tags):
        """
        Because it is a pain to call it from the kubernetes check otherwise.
        """
        return self.docker_util.are_tags_filtered(tags)

    @classmethod
    def get_auth_token(cls, instance):
        """
        Return a string containing the authorization token for the pod.
        """

        token_path = instance.get('bearer_token_path', cls.AUTH_TOKEN_PATH)
        try:
            with open(token_path) as f:
                return f.read().strip()
        except IOError as e:
            log.error('Unable to read token from {}: {}'.format(token_path, e))

        return None

    def match_services_for_pod(self, pod_metadata, refresh=False):
        """
        Match the pods labels with services' label selectors to determine the list
        of services that point to that pod. Returns an array of service names.

        Pass refresh=True if you want to bypass the cached cid->services mapping (after a service change)
        """
        s = self._service_mapper.match_services_for_pod(pod_metadata, refresh, names=True)
        #log.warning("Matches for %s: %s" % (pod_metadata.get('name'), str(s)))
        return s

    def get_event_retriever(self, namespaces=None, kinds=None, delay=None):
        """
        Returns a KubeEventRetriever object ready for action
        """
        return KubeEventRetriever(self, namespaces, kinds, delay)

    def match_containers_for_pods(self, pod_uids, podlist=None):
        """
        Reads a set of pod uids and returns the set of docker
        container ids they manage
        podlist should be a recent self.retrieve_pods_list return value,
        if not given that method will be called
        """
        cids = set()

        if not isinstance(pod_uids, set) or len(pod_uids) < 1:
            return cids

        if podlist is None:
            podlist = self.retrieve_pods_list()

        for pod in podlist.get('items', {}):
            uid = pod.get('metadata', {}).get('uid', None)
            if uid in pod_uids:
                for container in pod.get('status', {}).get('containerStatuses', None):
                    id = container.get('containerID', "")
                    if id.startswith("docker://"):
                        cids.add(id[9:])

        return cids

    def get_pod_creator(self, pod_metadata):
        """
        Get the pod's creator from its metadata and returns a
        tuple (creator_kind, creator_name)

        This allows for consitency across code path
        """
        try:
            owner_references_entry = pod_metadata['ownerReferences'][0]
            creator_kind = owner_references_entry['kind']
            creator_name = owner_references_entry['name']
            return creator_kind, creator_name
        except LookupError as e:
            try:
                log.debug('Could not parse creator for pod %s through `OwnerReferences`, falling back to annotation: %s',
                          pod_metadata.get('name', ''), type(e))
                created_by = json.loads(pod_metadata['annotations']['kubernetes.io/created-by'])
                creator_kind = created_by.get('reference', {}).get('kind')
                creator_name = created_by.get('reference', {}).get('name')
                return creator_kind, creator_name
            except Exception as e:
                log.debug('Could not parse creator for pod %s: %s', pod_metadata.get('name', ''), type(e))
                return None, None

    def get_pod_creator_tags(self, pod_metadata, legacy_rep_controller_tag=False):
        """
        Get the pod's creator from its metadata and returns a list of tags
        in the form kube_$kind:$name, ready to add to the metrics
        """
        try:
            tags = []
            creator_kind, creator_name = self.get_pod_creator(pod_metadata)
            if creator_kind in CREATOR_KIND_TO_TAG and creator_name:
                tags.append("%s:%s" % (CREATOR_KIND_TO_TAG[creator_kind], creator_name))
                if creator_kind == 'ReplicaSet':
                    deployment = self.get_deployment_for_replicaset(creator_name)
                    if deployment:
                        tags.append("%s:%s" % (CREATOR_KIND_TO_TAG['Deployment'], deployment))
            if legacy_rep_controller_tag and creator_kind != 'ReplicationController' and creator_name:
                tags.append('kube_replication_controller:{0}'.format(creator_name))

            return tags
        except Exception:
            log.warning('Could not parse creator tags for pod ' + pod_metadata.get('name'))
            return []

    def process_events(self, event_array, podlist=None):
        """
        Reads a list of kube events, invalidates caches and and computes a set
        of containers impacted by the changes, to refresh service discovery
        Pod creation/deletion events are ignored for now, as docker_daemon already
        sends container creation/deletion events to SD

        Pod->containers matching is done using match_containers_for_pods
        """
        try:
            pods = set()
            if self._service_mapper:
                pods.update(self._service_mapper.process_events(event_array))
            return self.match_containers_for_pods(pods, podlist)
        except Exception as e:
            log.warning("Error processing events %s: %s" % (str(event_array), e))
            return set()

    def refresh_leader(self):
        if not self.init_success:
            log.warning("Kubelet client is not initialized, leader election is disabled.")
            return
        if not self.leader_elector:
            self.leader_elector = LeaderElector(self)
        self.leader_elector.try_acquire_or_refresh()

    def image_name_resolver(self, image):
        """
        Wraps around the sibling dockerutil method and catches exceptions
        """
        if image is None:
            return None
        try:
            return self.docker_util.image_name_resolver(image)
        except Exception as e:
            log.warning("Error resolving image name: %s", str(e))
            return image
Exemple #14
0
class KubeUtil():
    __metaclass__ = Singleton

    DEFAULT_METHOD = 'http'
    METRICS_PATH = '/api/v1.3/subcontainers/'
    PODS_LIST_PATH = '/pods/'
    DEFAULT_CADVISOR_PORT = 4194
    DEFAULT_KUBELET_PORT = 10255
    DEFAULT_MASTER_PORT = 8080

    POD_NAME_LABEL = "io.kubernetes.pod.name"
    NAMESPACE_LABEL = "io.kubernetes.pod.namespace"

    def __init__(self):
        self.docker_util = DockerUtil()
        try:
            config_file_path = get_conf_path(KUBERNETES_CHECK_NAME)
            check_config = check_yaml(config_file_path)
            instance = check_config['instances'][0]
        # kubernetes.yaml was not found
        except IOError as ex:
            log.error(ex.message)
            instance = {}
        except Exception:
            log.error(
                'Kubernetes configuration file is invalid. '
                'Trying connecting to kubelet with default settings anyway...')
            instance = {}

        self.method = instance.get('method', KubeUtil.DEFAULT_METHOD)
        self.host = instance.get("host") or self.docker_util.get_hostname()

        self.cadvisor_port = instance.get('port',
                                          KubeUtil.DEFAULT_CADVISOR_PORT)
        self.kubelet_port = instance.get('kubelet_port',
                                         KubeUtil.DEFAULT_KUBELET_PORT)

        self.metrics_url = urljoin(
            '%s://%s:%d' % (self.method, self.host, self.cadvisor_port),
            KubeUtil.METRICS_PATH)
        self.pods_list_url = urljoin(
            '%s://%s:%d' % (self.method, self.host, self.kubelet_port),
            KubeUtil.PODS_LIST_PATH)

        self.kube_health_url = '%s://%s:%d/healthz' % (self.method, self.host,
                                                       self.kubelet_port)

    def get_kube_labels(self, excluded_keys=None):
        pods = retrieve_json(self.pods_list_url)
        return self.extract_kube_labels(pods, excluded_keys=excluded_keys)

    def extract_kube_labels(self, pods_list, excluded_keys=None):
        """
        Extract labels from a list of pods coming from
        the kubelet API.
        """
        excluded_keys = excluded_keys or []
        kube_labels = defaultdict(list)
        pod_items = pods_list.get("items") or []
        for pod in pod_items:
            metadata = pod.get("metadata", {})
            name = metadata.get("name")
            namespace = metadata.get("namespace")
            labels = metadata.get("labels")
            if name and labels and namespace:
                key = "%s/%s" % (namespace, name)

                for k, v in labels.iteritems():
                    if k in excluded_keys:
                        continue

                    kube_labels[key].append(u"kube_%s:%s" % (k, v))

        return kube_labels

    def retrieve_pods_list(self):
        return retrieve_json(self.pods_list_url)