コード例 #1
0
def agent_container_inspect():
    # Self inspection based on cgroups
    # On all platforms, the container ID is the last part of the path.
    REGEX_PATTERN = '(.*/)+([a-z0-9]{64})$'

    dockerutil = DockerUtil()
    cgroup_path = '/proc/self/cgroup'
    container_id = None

    with open(cgroup_path, 'r') as f:
        for ind in f:
            id_match = re.search(REGEX_PATTERN, ind)
            if id_match:
                container_id = id_match.group(2)
                break
    if container_id is None:
        print(
            "The container_id could not be found. Refer to the docker log of the container running the agent"
        )
        return 1
    try:
        inspect = dockerutil.inspect_container(container_id)
        key_indices = [
            i for i, k in enumerate(inspect['Config']['Env']) if 'API_KEY' in k
        ]
        for ind in key_indices:
            inspect['Config']['Env'][ind] = '%s=%s' % (
                inspect['Config']['Env'][ind].split('=', 1)[0], 'redacted')
        print json.dumps(inspect, indent=4)
        return 0
    except Exception as e:
        print "Could not inspect container: %s" % e
コード例 #2
0
ファイル: configcheck.py プロジェクト: serverdensity/sd-agent
def agent_container_inspect():
    # Self inspection based on cgroups
    # On all platforms, the container ID is the last part of the path.
    REGEX_PATTERN = '(.*/)+([a-z0-9]{64})$'

    dockerutil = DockerUtil()
    cgroup_path = '/proc/self/cgroup'
    container_id = None

    with open(cgroup_path, 'r') as f:
        for ind in f:
            id_match = re.search(REGEX_PATTERN, ind)
            if id_match:
                container_id = id_match.group(2)
                break
    if container_id is None:
        print("The container_id could not be found. Refer to the docker log of the container running the agent")
        return 1
    try:
        inspect = dockerutil.inspect_container(container_id)
        key_indices = [i for i, k in enumerate(inspect['Config']['Env']) if 'API_KEY' in k]
        for ind in key_indices:
            inspect['Config']['Env'][ind] = '%s=%s' % (inspect['Config']['Env'][ind].split('=', 1)[0], 'redacted')
        print json.dumps(inspect, indent=4)
        return 0
    except Exception as e:
        print "Could not inspect container: %s" % e
コード例 #3
0
class SDDockerBackend(AbstractSDBackend):
    """Docker-based service discovery"""

    def __init__(self, agentConfig):
        self.docker_client = DockerUtil().client
        if is_k8s():
            self.kubeutil = KubeUtil()

        try:
            self.config_store = get_config_store(agentConfig=agentConfig)
        except Exception as e:
            log.error('Failed to instantiate the config store client. '
                      'Auto-config only will be used. %s' % str(e))
            agentConfig['sd_config_backend'] = None
            self.config_store = get_config_store(agentConfig=agentConfig)

        self.VAR_MAPPING = {
            'host': self._get_host_address,
            'port': self._get_port,
            'tags': self._get_additional_tags,
        }

        AbstractSDBackend.__init__(self, agentConfig)

    def _get_host_address(self, c_inspect, tpl_var):
        """Extract the container IP from a docker inspect object, or the kubelet API."""
        c_id, c_img = c_inspect.get('Id', ''), c_inspect.get('Config', {}).get('Image', '')
        tpl_parts = tpl_var.split('_')

        # a specifier was given
        if len(tpl_parts) > 1:
            networks = c_inspect.get('NetworkSettings', {}).get('Networks') or {}
            ip_dict = {}
            for net_name, net_desc in networks.iteritems():
                ip = net_desc.get('IPAddress')
                if ip:
                    ip_dict[net_name] = ip
            ip_addr = self._extract_ip_from_networks(ip_dict, tpl_var)
            if ip_addr:
                return ip_addr

        # try to get the bridge IP address
        log.debug("No network found for container %s (%s), trying with IPAddress field" % (c_id[:12], c_img))
        ip_addr = c_inspect.get('NetworkSettings', {}).get('IPAddress')
        if ip_addr:
            return ip_addr

        if is_k8s():
            # kubernetes case
            log.debug("Couldn't find the IP address for container %s (%s), "
                      "using the kubernetes way." % (c_id[:12], c_img))
            pod_list = self.kubeutil.retrieve_pods_list().get('items', [])
            for pod in pod_list:
                pod_ip = pod.get('status', {}).get('podIP')
                if pod_ip is None:
                    continue
                else:
                    c_statuses = pod.get('status', {}).get('containerStatuses', [])
                    for status in c_statuses:
                        # compare the container id with those of containers in the current pod
                        if c_id == status.get('containerID', '').split('//')[-1]:
                            return pod_ip

        log.error("No IP address was found for container %s (%s)" % (c_id[:12], c_img))
        return None

    def _extract_ip_from_networks(self, ip_dict, tpl_var):
        """Extract a single IP from a dictionary made of network names and IPs."""
        if not ip_dict:
            return None
        tpl_parts = tpl_var.split('_')

        # no specifier
        if len(tpl_parts) < 2:
            log.warning("No key was passed for template variable %s." % tpl_var)
            return self._get_fallback_ip(ip_dict)
        else:
            res = ip_dict.get(tpl_parts[-1])
            if res is None:
                log.warning("The key passed for template variable %s was not found." % tpl_var)
                return self._get_fallback_ip(ip_dict)
            else:
                return res

    def _get_fallback_ip(self, ip_dict):
        """try to pick the bridge key, falls back to the value of the last key"""
        if 'bridge' in ip_dict:
            log.warning("Using the bridge network.")
            return ip_dict['bridge']
        else:
            last_key = sorted(ip_dict.iterkeys())[-1]
            log.warning("Trying with the last key: '%s'." % last_key)
            return ip_dict[last_key]

    def _get_port(self, container_inspect, tpl_var):
        """Extract a port from a container_inspect or the k8s API given a template variable."""
        c_id = container_inspect.get('Id', '')

        try:
            ports = map(lambda x: x.split('/')[0], container_inspect['NetworkSettings']['Ports'].keys())
        except (IndexError, KeyError, AttributeError):
            # try to get ports from the docker API. Works if the image has an EXPOSE instruction
            ports = map(lambda x: x.split('/')[0], container_inspect['Config'].get('ExposedPorts', {}).keys())

            # if it failed, try with the kubernetes API
            if not ports and is_k8s():
                log.debug("Didn't find the port for container %s (%s), trying the kubernetes way." %
                          (c_id[:12], container_inspect.get('Config', {}).get('Image', '')))
                co_statuses = self._get_kube_config(c_id, 'status').get('containerStatuses', [])
                c_name = None
                for co in co_statuses:
                    if co.get('containerID', '').split('//')[-1] == c_id:
                        c_name = co.get('name')
                        break
                containers = self._get_kube_config(c_id, 'spec').get('containers', [])
                for co in containers:
                    if co.get('name') == c_name:
                        ports = map(lambda x: str(x.get('containerPort')), co.get('ports', []))
        ports = sorted(ports, key=lambda x: int(x))
        return self._extract_port_from_list(ports, tpl_var)

    def _extract_port_from_list(self, ports, tpl_var):
        if not ports:
            return None

        tpl_parts = tpl_var.split('_')

        if len(tpl_parts) == 1:
            log.debug("No index was passed for template variable %s. "
                      "Trying with the last element." % tpl_var)
            return ports[-1]

        try:
            idx = tpl_parts[-1]
            return ports[int(idx)]
        except ValueError:
            log.error("Port index is not an integer. Using the last element instead.")
        except IndexError:
            log.error("Port index is out of range. Using the last element instead.")
        return ports[-1]

    def get_tags(self, c_inspect):
        """Extract useful tags from docker or platform APIs. These are collected by default."""
        tags = []
        if is_k8s():
            pod_metadata = self._get_kube_config(c_inspect.get('Id'), 'metadata')

            if pod_metadata is None:
                log.warning("Failed to fetch pod metadata for container %s."
                            " Kubernetes tags may be missing." % c_inspect.get('Id', '')[:12])
                return []
            # get labels
            kube_labels = pod_metadata.get('labels', {})
            for label, value in kube_labels.iteritems():
                tags.append('%s:%s' % (label, value))

            # get replication controller
            created_by = json.loads(pod_metadata.get('annotations', {}).get('kubernetes.io/created-by', '{}'))
            if created_by.get('reference', {}).get('kind') == 'ReplicationController':
                tags.append('kube_replication_controller:%s' % created_by.get('reference', {}).get('name'))

            # get kubernetes namespace
            tags.append('kube_namespace:%s' % pod_metadata.get('namespace'))

        return tags

    def _get_additional_tags(self, container_inspect, *args):
        tags = []
        if is_k8s():
            pod_metadata = self._get_kube_config(container_inspect.get('Id'), 'metadata')
            pod_spec = self._get_kube_config(container_inspect.get('Id'), 'spec')
            if pod_metadata is None or pod_spec is None:
                log.warning("Failed to fetch pod metadata or pod spec for container %s."
                            " Additional Kubernetes tags may be missing." % container_inspect.get('Id', '')[:12])
                return []
            tags.append('node_name:%s' % pod_spec.get('nodeName'))
            tags.append('pod_name:%s' % pod_metadata.get('name'))
        return tags

    def _get_kube_config(self, c_id, key):
        """Get a part of a pod config from the kubernetes API"""
        pods = self.kubeutil.retrieve_pods_list().get('items', [])
        for pod in pods:
            c_statuses = pod.get('status', {}).get('containerStatuses', [])
            for status in c_statuses:
                if c_id == status.get('containerID', '').split('//')[-1]:
                    return pod.get(key, {})

    def get_configs(self):
        """Get the config for all docker containers running on the host."""
        configs = {}
        containers = [(
            container.get('Image'),
            container.get('Id'), container.get('Labels')
        ) for container in self.docker_client.containers()]

        # used by the configcheck agent command to trace where check configs come from
        trace_config = self.agentConfig.get(TRACE_CONFIG, False)

        for image, cid, labels in containers:
            try:
                # value of the DATADOG_ID tag or the image name if the label is missing
                identifier = self.get_config_id(image, labels)
                check_configs = self._get_check_configs(cid, identifier, trace_config=trace_config) or []
                for conf in check_configs:
                    if trace_config and conf is not None:
                        source, conf = conf

                    check_name, init_config, instance = conf
                    # build instances list if needed
                    if configs.get(check_name) is None:
                        if trace_config:
                            configs[check_name] = (source, (init_config, [instance]))
                        else:
                            configs[check_name] = (init_config, [instance])
                    else:
                        conflict_init_msg = 'Different versions of `init_config` found for check {0}. ' \
                            'Keeping the first one found.'
                        if trace_config:
                            if configs[check_name][1][0] != init_config:
                                log.warning(conflict_init_msg.format(check_name))
                            configs[check_name][1][1].append(instance)
                        else:
                            if configs[check_name][0] != init_config:
                                log.warning(conflict_init_msg.format(check_name))
                            configs[check_name][1].append(instance)
            except Exception:
                log.exception('Building config for container %s based on image %s using service '
                              'discovery failed, leaving it alone.' % (cid[:12], image))
        return configs

    def get_config_id(self, image, labels):
        """Look for a DATADOG_ID label, return its value or the image name if missing"""
        return labels.get(DATADOG_ID) or image

    def _get_check_configs(self, c_id, identifier, trace_config=False):
        """Retrieve configuration templates and fill them with data pulled from docker and tags."""
        inspect = self.docker_client.inspect_container(c_id)
        config_templates = self._get_config_templates(identifier, trace_config=trace_config)
        if not config_templates:
            log.debug('No config template for container %s with identifier %s. '
                      'It will be left unconfigured.' % (c_id[:12], identifier))
            return None

        check_configs = []
        tags = self.get_tags(inspect)
        for config_tpl in config_templates:
            if trace_config:
                source, config_tpl = config_tpl
            check_name, init_config_tpl, instance_tpl, variables = config_tpl

            # insert tags in instance_tpl and process values for template variables
            instance_tpl, var_values = self._fill_tpl(inspect, instance_tpl, variables, tags)

            tpl = self._render_template(init_config_tpl or {}, instance_tpl or {}, var_values)
            if tpl and len(tpl) == 2:
                init_config, instance = tpl
                if trace_config:
                    check_configs.append((source, (check_name, init_config, instance)))
                else:
                    check_configs.append((check_name, init_config, instance))

        return check_configs

    def _get_config_templates(self, identifier, trace_config=False):
        """Extract config templates for an identifier from a K/V store and returns it as a dict object."""
        config_backend = self.agentConfig.get('sd_config_backend')
        templates = []
        if config_backend is None:
            auto_conf = True
            log.warning('No supported configuration backend was provided, using auto-config only.')
        else:
            auto_conf = False

        # format: [('ident', {init_tpl}, {instance_tpl})] without trace_config
        # or      [(source, ('ident', {init_tpl}, {instance_tpl}))] with trace_config
        raw_tpls = self.config_store.get_check_tpls(
            identifier, auto_conf=auto_conf, trace_config=trace_config)
        for tpl in raw_tpls:
            if trace_config and tpl is not None:
                # each template can come from either auto configuration or user-supplied templates
                source, tpl = tpl
            if tpl is not None and len(tpl) == 3:
                check_name, init_config_tpl, instance_tpl = tpl
            else:
                log.debug('No template was found for identifier %s, leaving it alone.' % identifier)
                return None
            try:
                # build a list of all variables to replace in the template
                variables = self.PLACEHOLDER_REGEX.findall(str(init_config_tpl)) + \
                    self.PLACEHOLDER_REGEX.findall(str(instance_tpl))
                variables = map(lambda x: x.strip('%'), variables)
                if not isinstance(init_config_tpl, dict):
                    init_config_tpl = json.loads(init_config_tpl or '{}')
                if not isinstance(instance_tpl, dict):
                    instance_tpl = json.loads(instance_tpl or '{}')
            except json.JSONDecodeError:
                log.exception('Failed to decode the JSON template fetched for check {0}. Its configuration'
                              ' by service discovery failed for ident  {1}.'.format(check_name, identifier))
                return None

            if trace_config:
                templates.append((source, (check_name, init_config_tpl, instance_tpl, variables)))
            else:
                templates.append((check_name, init_config_tpl, instance_tpl, variables))

        return templates

    def _fill_tpl(self, inspect, instance_tpl, variables, tags=None):
        """Add container tags to instance templates and build a
           dict from template variable names and their values."""
        var_values = {}
        c_id, c_image = inspect.get('Id', ''), inspect.get('Config', {}).get('Image', '')

        # add default tags to the instance
        if tags:
            tpl_tags = instance_tpl.get('tags', [])
            tags += tpl_tags if isinstance(tpl_tags, list) else [tpl_tags]
            instance_tpl['tags'] = list(set(tags))

        for var in variables:
            # variables can be suffixed with an index in case several values are found
            if var.split('_')[0] in self.VAR_MAPPING:
                try:
                    res = self.VAR_MAPPING[var.split('_')[0]](inspect, var)
                    if res is None:
                        raise ValueError("Invalid value for variable %s." % var)
                    var_values[var] = res
                except Exception as ex:
                    log.error("Could not find a value for the template variable %s for container %s "
                              "(%s): %s" % (var, c_id[:12], c_image, str(ex)))
            else:
                log.error("No method was found to interpolate template variable %s for container %s "
                          "(%s)." % (var, c_id[:12], c_image))

        return instance_tpl, var_values
コード例 #4
0
ファイル: ecsutil.py プロジェクト: netsil/dd-agent
class ECSUtil:
    __metaclass__ = Singleton

    def __init__(self):
        self.docker_util = DockerUtil()
        self.ecs_agent_local = None

        self.ecs_tags = {}
        self._populate_ecs_tags()

    def _get_ecs_address(self):
        """Detect how to connect to the ecs-agent"""
        ecs_config = self.docker_util.inspect_container('ecs-agent')
        ip = ecs_config.get('NetworkSettings', {}).get('IPAddress')
        ports = ecs_config.get('NetworkSettings', {}).get('Ports')
        port = ports.keys()[0].split('/')[0] if ports else None
        if not ip:
            port = ECS_INTROSPECT_DEFAULT_PORT
            if self._is_ecs_agent_local():
                ip = "localhost"
            elif Platform.is_containerized() and self.docker_gateway:
                ip = self.docker_gateway
            else:
                raise Exception("Unable to determine ecs-agent IP address")

        return ip, port

    def _populate_ecs_tags(self, skip_known=False):
        """
        Populate the cache of ecs tags. Can be called with skip_known=True
        If we just want to update new containers quickly (single task api call)
        (because we detected that a new task started for example)
        """
        try:
            ip, port = self._get_ecs_address()
        except Exception as ex:
            log.warning("Failed to connect to ecs-agent, skipping task tagging: %s" % ex)
            return

        try:
            tasks = requests.get('http://%s:%s/v1/tasks' % (ip, port)).json()
            for task in tasks.get('Tasks', []):
                for container in task.get('Containers', []):
                    cid = container['DockerId']

                    if skip_known and cid in self.ecs_tags:
                        continue

                    tags = ['task_name:%s' % task['Family'], 'task_version:%s' % task['Version']]
                    self.ecs_tags[container['DockerId']] = tags
        except requests.exceptions.HTTPError as ex:
            log.warning("Unable to collect ECS task names: %s" % ex)

    def _get_container_tags(self, cid):
        """
        This method triggers a fast fill of the tag cache (useful when a new task starts
        and we want the new containers to be cached with a single api call) and returns
        the tags (or an empty list) from the fresh cache.
        """
        self._populate_ecs_tags(skip_known=True)

        if cid in self.ecs_tags:
            return self.ecs_tags[cid]
        else:
            log.debug("Container %s doesn't seem to be an ECS task, skipping." % cid[:12])
            self.ecs_tags[cid] = []
        return []

    def _is_ecs_agent_local(self):
        """Return True if we can reach the ecs-agent over localhost, False otherwise.
        This is needed because if the ecs-agent is started with --net=host it won't have an IP address attached.
        """
        if self.ecs_agent_local is not None:
            return self.ecs_agent_local

        self.ecs_agent_local = False
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        sock.settimeout(5)
        try:
            result = sock.connect_ex(('localhost', ECS_INTROSPECT_DEFAULT_PORT))
        except Exception as e:
            log.debug("Unable to connect to ecs-agent. Exception: {0}".format(e))
        else:
            if result == 0:
                self.ecs_agent_local = True
            else:
                log.debug("ecs-agent is not available locally, encountered error code: {0}".format(result))
        sock.close()
        return self.ecs_agent_local

    def extract_container_tags(self, co):
        """
        Queries the ecs-agent to get ECS tags (task and task version) for a containers.
        As this is expensive, it is cached in the self.ecs_tags dict.
        The cache invalidation goes through invalidate_ecs_cache, called by the docker_daemon check

        :param co: container dict returned by docker-py
        :return: tags as list<string>, cached
        """
        co_id = co.get('Id', None)

        if co_id is None:
            log.warning("Invalid container object in extract_container_tags")
            return []

        if co_id in self.ecs_tags:
            return self.ecs_tags[co_id]
        else:
            return self._get_container_tags(co_id)

    def invalidate_cache(self, events):
        """
        Allows cache invalidation when containers die
        :param events from self.get_events
        """
        try:
            for ev in events:
                if ev.get('status') == 'die' and ev.get('id') in self.ecs_tags:
                    del self.ecs_tags[ev.get('id')]
        except Exception as e:
            log.warning("Error when invalidating ecs cache: " + str(e))
コード例 #5
0
class BaseUtil:
    """
    Base class for orchestrator utils. Only handles container tags for now.
    Users should go through the orchestrator.Tagger class to simplify the code

    Children classes can implement:
      - __init__: to change self.needs_inspect
      - _get_cacheable_tags: tags will be cached for reuse
      - _get_transient_tags: tags can change and won't be cached (TODO)
      - invalidate_cache: custom cache invalidation logic
      - is_detected (staticmethod)
    """
    __metaclass__ = Singleton

    def __init__(self):
        # Whether your get___tags methods need the Config section inspect data
        self.needs_inspect_config = False
        # Whether your get___tags methods need the Labels section inspect data
        self.needs_inspect_labels = False

        self.log = logging.getLogger(__name__)
        self.docker_util = DockerUtil()

        # Tags cache as a dict {co_id: [tags]}
        self._container_tags_cache = {}

    def get_container_tags(self, cid=None, co=None):
        """
        Returns container tags for the given container, inspecting the container if needed
        :param container: either the container id or container dict returned by docker-py
        :return: tags as list<string>, cached
        """

        if (cid is not None) and (co is not None):
            self.log.error(
                "Can only pass either a container id or object, not both, returning empty tags"
            )
            return []
        if (cid is None) and (co is None):
            self.log.error(
                "Need one container id or container object, returning empty tags"
            )
            return []
        elif co is not None:
            if 'Id' in co:
                cid = co.get('Id')
            else:
                self.log.warning(
                    "Invalid container dict, returning empty tags")
                return []

        if cid in self._container_tags_cache:
            return self._container_tags_cache[cid]
        else:
            if self.needs_inspect_config and (co is None
                                              or 'Config' not in co):
                co = self.docker_util.inspect_container(cid)
            if self.needs_inspect_labels and (co is None
                                              or 'Labels' not in co):
                co = self.docker_util.inspect_container(cid)

            self._container_tags_cache[cid] = self._get_cacheable_tags(cid, co)
            return self._container_tags_cache[cid]

    def invalidate_cache(self, events):
        """
        Allows cache invalidation when containers die
        :param events from self.get_events
        """
        try:
            for ev in events:
                if ev.get('status') == 'die' and ev.get(
                        'id') in self._container_tags_cache:
                    del self._container_tags_cache[ev.get('id')]
        except Exception as e:
            self.log.warning("Error when invalidating tag cache: " + str(e))

    def reset_cache(self):
        """
        Empties all caches to reset the singleton to initial state
        """
        self._container_tags_cache = {}

    # Util methods for children classes

    def _try_urls(self, urls, validation_lambda=None, timeout=1):
        """
        When detecting orchestrator agents, one might need to try several IPs
        before finding the good one.
        The first url returning a 200 and validating the lambda will be returned.
        If no lambda is provided, the first url to return a 200 is returned.
        :param urls: list of urls to try
        :param validation_lambda: lambda to return a boolean from a Request.Response
        :return: first url matching, or None
        """
        if not urls:
            return None

        for url in urls:
            try:
                response = requests.get(url, timeout=timeout)
                if response.status_code is not requests.codes.ok:
                    continue
                if validation_lambda and not validation_lambda(response):
                    continue
                return url
            except requests.exceptions.RequestException:  # Network
                continue
            except ValueError:  # JSON parsing or dict search
                continue
            except TypeError:  # NoneType errors
                continue

        return None
コード例 #6
0
ファイル: nomadutil.py プロジェクト: netsil/dd-agent
class NomadUtil:
    __metaclass__ = Singleton

    def __init__(self):
        self.docker_util = DockerUtil()

        # Tags cache as a dict {co_id: (create_timestamp, [tags])}
        self._container_tags_cache = {}

    def extract_container_tags(self, co):
        """
        Queries docker inspect to get nomad tags in the container's environment vars.
        As this is expensive, it is cached in the self._nomad_tags_cache dict.
        The cache invalidation goes through invalidate_nomad_cache, called by the docker_daemon check

        :param co: container dict returned by docker-py
        :return: tags as list<string>, cached
        """

        co_id = co.get('Id', None)

        if co_id is None:
            log.warning("Invalid container object in extract_container_tags")
            return

        # Cache lookup on Id, verified on Created timestamp
        if co_id in self._container_tags_cache:
            created, tags = self._container_tags_cache[co_id]
            if created == co.get('Created', -1):
                return tags

        tags = []
        try:
            inspect_info = self.docker_util.inspect_container(co_id)
            envvars = inspect_info.get('Config', {}).get('Env', {})
            for var in envvars:
                if var.startswith(NOMAD_TASK_NAME):
                    tags.append('nomad_task:%s' % var[len(NOMAD_TASK_NAME) + 1:])
                elif var.startswith(NOMAD_JOB_NAME):
                    tags.append('nomad_job:%s' % var[len(NOMAD_JOB_NAME) + 1:])
                elif var.startswith(NOMAD_ALLOC_NAME):
                    try:
                        start = var.index('.', len(NOMAD_ALLOC_NAME)) + 1
                        end = var.index('[')
                        if end <= start:
                            raise ValueError("Error extracting group from %s, check format" % var)
                        tags.append('nomad_group:%s' % var[start:end])
                    except ValueError:
                        pass
                    self._container_tags_cache[co_id] = (co.get('Created'), tags)
        except Exception as e:
            log.warning("Error while parsing Nomad tags: %s" % str(e))
        finally:
            return tags

    def invalidate_cache(self, events):
        """
        Allows cache invalidation when containers dies
        :param events from self.get_events
        """
        try:
            for ev in events:
                if ev.get('status') == 'die' and ev.get('id') in self._container_tags_cache:
                    del self._container_tags_cache[ev.get('id')]
        except Exception as e:
            log.warning("Error when invalidating nomad cache: " + str(e))
コード例 #7
0
ファイル: ecsutil.py プロジェクト: Scofields/dd-agent
class ECSUtil:
    __metaclass__ = Singleton

    def __init__(self):
        self.docker_util = DockerUtil()
        self.ecs_agent_local = None

        self.ecs_tags = {}
        self._populate_ecs_tags()

    def _get_ecs_address(self):
        """Detect how to connect to the ecs-agent"""
        ecs_config = self.docker_util.inspect_container('ecs-agent')
        ip = ecs_config.get('NetworkSettings', {}).get('IPAddress')
        ports = ecs_config.get('NetworkSettings', {}).get('Ports')
        port = ports.keys()[0].split('/')[0] if ports else None
        if not ip:
            port = ECS_INTROSPECT_DEFAULT_PORT
            if self._is_ecs_agent_local():
                ip = "localhost"
            elif Platform.is_containerized():
                ip = self.docker_util.get_gateway()
            else:
                raise Exception("Unable to determine ecs-agent IP address")

        return ip, port

    def _populate_ecs_tags(self, skip_known=False):
        """
        Populate the cache of ecs tags. Can be called with skip_known=True
        If we just want to update new containers quickly (single task api call)
        (because we detected that a new task started for example)
        """
        try:
            ip, port = self._get_ecs_address()
        except Exception as ex:
            log.warning(
                "Failed to connect to ecs-agent, skipping task tagging: %s" %
                ex)
            return

        try:
            tasks = requests.get('http://%s:%s/v1/tasks' % (ip, port)).json()
            for task in tasks.get('Tasks', []):
                for container in task.get('Containers', []):
                    cid = container['DockerId']

                    if skip_known and cid in self.ecs_tags:
                        continue

                    tags = [
                        'task_name:%s' % task['Family'],
                        'task_version:%s' % task['Version']
                    ]
                    self.ecs_tags[container['DockerId']] = tags
        except requests.exceptions.HTTPError as ex:
            log.warning("Unable to collect ECS task names: %s" % ex)

    def _get_container_tags(self, cid):
        """
        This method triggers a fast fill of the tag cache (useful when a new task starts
        and we want the new containers to be cached with a single api call) and returns
        the tags (or an empty list) from the fresh cache.
        """
        self._populate_ecs_tags(skip_known=True)

        if cid in self.ecs_tags:
            return self.ecs_tags[cid]
        else:
            log.debug(
                "Container %s doesn't seem to be an ECS task, skipping." %
                cid[:12])
            self.ecs_tags[cid] = []
        return []

    def _is_ecs_agent_local(self):
        """Return True if we can reach the ecs-agent over localhost, False otherwise.
        This is needed because if the ecs-agent is started with --net=host it won't have an IP address attached.
        """
        if self.ecs_agent_local is not None:
            return self.ecs_agent_local

        self.ecs_agent_local = False
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        sock.settimeout(5)
        try:
            result = sock.connect_ex(
                ('localhost', ECS_INTROSPECT_DEFAULT_PORT))
        except Exception as e:
            log.debug(
                "Unable to connect to ecs-agent. Exception: {0}".format(e))
        else:
            if result == 0:
                self.ecs_agent_local = True
            else:
                log.debug(
                    "ecs-agent is not available locally, encountered error code: {0}"
                    .format(result))
        sock.close()
        return self.ecs_agent_local

    def extract_container_tags(self, co):
        """
        Queries the ecs-agent to get ECS tags (task and task version) for a containers.
        As this is expensive, it is cached in the self.ecs_tags dict.
        The cache invalidation goes through invalidate_ecs_cache, called by the docker_daemon check

        :param co: container dict returned by docker-py
        :return: tags as list<string>, cached
        """
        co_id = co.get('Id', None)

        if co_id is None:
            log.warning("Invalid container object in extract_container_tags")
            return []

        if co_id in self.ecs_tags:
            return self.ecs_tags[co_id]
        else:
            return self._get_container_tags(co_id)

    def invalidate_cache(self, events):
        """
        Allows cache invalidation when containers die
        :param events from self.get_events
        """
        try:
            for ev in events:
                if ev.get('status') == 'die' and ev.get('id') in self.ecs_tags:
                    del self.ecs_tags[ev.get('id')]
        except Exception as e:
            log.warning("Error when invalidating ecs cache: " + str(e))
コード例 #8
0
ファイル: sd_docker_backend.py プロジェクト: jsh2134/dd-agent
class SDDockerBackend(AbstractSDBackend):
    """Docker-based service discovery"""

    def __init__(self, agentConfig):
        self.docker_client = DockerUtil().client
        if is_k8s():
            self.kubeutil = KubeUtil()

        try:
            self.config_store = get_config_store(agentConfig=agentConfig)
        except Exception as e:
            log.error('Failed to instantiate the config store client. '
                      'Auto-config only will be used. %s' % str(e))
            agentConfig['sd_config_backend'] = None
            self.config_store = get_config_store(agentConfig=agentConfig)

        self.VAR_MAPPING = {
            'host': self._get_host,
            'port': self._get_ports,
            'tags': self._get_additional_tags,
        }
        AbstractSDBackend.__init__(self, agentConfig)

    def _get_host(self, container_inspect):
        """Extract the host IP from a docker inspect object, or the kubelet API."""
        ip_addr = container_inspect.get('NetworkSettings', {}).get('IPAddress')
        if not ip_addr:
            if not is_k8s():
                return
            # kubernetes case
            log.debug("Didn't find the IP address for container %s (%s), using the kubernetes way." %
                      (container_inspect.get('Id', '')[:12], container_inspect.get('Config', {}).get('Image', '')))
            pod_list = self.kubeutil.retrieve_pods_list().get('items', [])
            c_id = container_inspect.get('Id')
            for pod in pod_list:
                pod_ip = pod.get('status', {}).get('podIP')
                if pod_ip is None:
                    continue
                else:
                    c_statuses = pod.get('status', {}).get('containerStatuses', [])
                    for status in c_statuses:
                        # compare the container id with those of containers in the current pod
                        if c_id == status.get('containerID', '').split('//')[-1]:
                            ip_addr = pod_ip

        return ip_addr

    def _get_ports(self, container_inspect):
        """Extract a list of available ports from a docker inspect object. Sort them numerically."""
        c_id = container_inspect.get('Id', '')
        try:
            ports = map(lambda x: x.split('/')[0], container_inspect['NetworkSettings']['Ports'].keys())
        except (IndexError, KeyError, AttributeError):
            log.debug("Didn't find the port for container %s (%s), trying the kubernetes way." %
                      (c_id[:12], container_inspect.get('Config', {}).get('Image', '')))
            # first we try to get it from the docker API
            # it works if the image has an EXPOSE instruction
            ports = map(lambda x: x.split('/')[0], container_inspect['Config'].get('ExposedPorts', {}).keys())
            # if it failed, try with the kubernetes API
            if not ports and is_k8s():
                co_statuses = self._get_kube_config(c_id, 'status').get('containerStatuses', [])
                c_name = None
                for co in co_statuses:
                    if co.get('containerID', '').split('//')[-1] == c_id:
                        c_name = co.get('name')
                        break
                containers = self._get_kube_config(c_id, 'spec').get('containers', [])
                for co in containers:
                    if co.get('name') == c_name:
                        ports = map(lambda x: str(x.get('containerPort')), co.get('ports', []))
        ports = sorted(ports, key=lambda x: int(x))
        return ports

    def get_tags(self, c_inspect):
        """Extract useful tags from docker or platform APIs. These are collected by default."""
        tags = []
        if is_k8s():
            pod_metadata = self._get_kube_config(c_inspect.get('Id'), 'metadata')

            if pod_metadata is None:
                log.warning("Failed to fetch pod metadata for container %s."
                            " Kubernetes tags may be missing." % c_inspect.get('Id', '')[:12])
                return []
            # get labels
            kube_labels = pod_metadata.get('labels', {})
            for label, value in kube_labels.iteritems():
                tags.append('%s:%s' % (label, value))

            # get replication controller
            created_by = json.loads(pod_metadata.get('annotations', {}).get('kubernetes.io/created-by', '{}'))
            if created_by.get('reference', {}).get('kind') == 'ReplicationController':
                tags.append('kube_replication_controller:%s' % created_by.get('reference', {}).get('name'))

            # get kubernetes namespace
            tags.append('kube_namespace:%s' % pod_metadata.get('namespace'))

        return tags

    def _get_additional_tags(self, container_inspect):
        tags = []
        if is_k8s():
            pod_metadata = self._get_kube_config(container_inspect.get('Id'), 'metadata')
            pod_spec = self._get_kube_config(container_inspect.get('Id'), 'spec')
            tags.append('node_name:%s' % pod_spec.get('nodeName'))
            tags.append('pod_name:%s' % pod_metadata.get('name'))
        return tags

    def _get_kube_config(self, c_id, key):
        """Get a part of a pod config from the kubernetes API"""
        pods = self.kubeutil.retrieve_pods_list().get('items', [])
        for pod in pods:
            c_statuses = pod.get('status', {}).get('containerStatuses', [])
            for status in c_statuses:
                if c_id == status.get('containerID', '').split('//')[-1]:
                    return pod.get(key, {})

    def get_configs(self):
        """Get the config for all docker containers running on the host."""
        configs = {}
        containers = [(
            container.get('Image').split(':')[0].split('/')[-1],
            container.get('Id'), container.get('Labels')
        ) for container in self.docker_client.containers()]

        # used by the configcheck agent command to trace where check configs come from
        trace_config = self.agentConfig.get(TRACE_CONFIG, False)

        for image, cid, labels in containers:
            try:
                check_configs = self._get_check_configs(cid, image, trace_config=trace_config) or []
                for conf in check_configs:
                    if trace_config and conf is not None:
                        source, conf = conf

                    check_name, init_config, instance = conf
                    # build instances list if needed
                    if configs.get(check_name) is None:
                        if trace_config:
                            configs[check_name] = (source, (init_config, [instance]))
                        else:
                            configs[check_name] = (init_config, [instance])
                    else:
                        conflict_init_msg = 'Different versions of `init_config` found for check {0}. ' \
                            'Keeping the first one found.'
                        if trace_config:
                            if configs[check_name][1][0] != init_config:
                                log.warning(conflict_init_msg.format(check_name))
                            configs[check_name][1][1].append(instance)
                        else:
                            if configs[check_name][0] != init_config:
                                log.warning(conflict_init_msg.format(check_name))
                            configs[check_name][1].append(instance)
            except Exception:
                log.exception('Building config for container %s based on image %s using service'
                              ' discovery failed, leaving it alone.' % (cid[:12], image))
        return configs

    def _get_check_configs(self, c_id, image, trace_config=False):
        """Retrieve configuration templates and fill them with data pulled from docker and tags."""
        inspect = self.docker_client.inspect_container(c_id)
        config_templates = self._get_config_templates(image, trace_config=trace_config)
        if not config_templates:
            log.debug('No config template for container %s with image %s. '
                      'It will be left unconfigured.' % (c_id[:12], image))
            return None

        check_configs = []
        tags = self.get_tags(inspect)
        for config_tpl in config_templates:
            if trace_config:
                source, config_tpl = config_tpl
            check_name, init_config_tpl, instance_tpl, variables = config_tpl

            # insert tags in instance_tpl and process values for template variables
            instance_tpl, var_values = self._fill_tpl(inspect, instance_tpl, variables, tags)

            tpl = self._render_template(init_config_tpl or {}, instance_tpl or {}, var_values)
            if tpl and len(tpl) == 2:
                if trace_config and len(tpl[1]) == 2:
                    source, (init_config, instance) = tpl
                    check_configs.append((source, (check_name, init_config, instance)))
                elif not trace_config:
                    init_config, instance = tpl
                    check_configs.append((check_name, init_config, instance))

        return check_configs

    def _get_config_templates(self, image_name, trace_config=False):
        """Extract config templates for an image from a K/V store and returns it as a dict object."""
        config_backend = self.agentConfig.get('sd_config_backend')
        templates = []
        if config_backend is None:
            auto_conf = True
            log.warning('No supported configuration backend was provided, using auto-config only.')
        else:
            auto_conf = False

        # format: [('image', {init_tpl}, {instance_tpl})] without trace_config
        # or      [(source, ('image', {init_tpl}, {instance_tpl}))] with trace_config
        raw_tpls = self.config_store.get_check_tpls(image_name, auto_conf=auto_conf, trace_config=trace_config)
        for tpl in raw_tpls:
            if trace_config and tpl is not None:
                # each template can come from either auto configuration or user-supplied templates
                source, tpl = tpl
            if tpl is not None and len(tpl) == 3:
                check_name, init_config_tpl, instance_tpl = tpl
            else:
                log.debug('No template was found for image %s, leaving it alone.' % image_name)
                return None
            try:
                # build a list of all variables to replace in the template
                variables = self.PLACEHOLDER_REGEX.findall(str(init_config_tpl)) + \
                    self.PLACEHOLDER_REGEX.findall(str(instance_tpl))
                variables = map(lambda x: x.strip('%'), variables)
                if not isinstance(init_config_tpl, dict):
                    init_config_tpl = json.loads(init_config_tpl or '{}')
                if not isinstance(instance_tpl, dict):
                    instance_tpl = json.loads(instance_tpl or '{}')
            except json.JSONDecodeError:
                log.exception('Failed to decode the JSON template fetched for check {0}. Its configuration'
                              ' by service discovery failed for {1}.'.format(check_name, image_name))
                return None

            if trace_config:
                templates.append((source, (check_name, init_config_tpl, instance_tpl, variables)))
            else:
                templates.append((check_name, init_config_tpl, instance_tpl, variables))

        return templates

    def _fill_tpl(self, inspect, instance_tpl, variables, tags=None):
        """Add container tags to instance templates and build a """
        """dict from template variable names and their values."""
        var_values = {}

        # add default tags to the instance
        if tags:
            tags += instance_tpl.get('tags', [])
            instance_tpl['tags'] = list(set(tags))

        for v in variables:
            # variables can be suffixed with an index in case a list is found
            var_parts = v.split('_')
            if var_parts[0] in self.VAR_MAPPING:
                try:
                    res = self.VAR_MAPPING[var_parts[0]](inspect)
                    if not res:
                        raise ValueError("Invalid value for variable %s." % var_parts[0])
                    # if an index is found in the variable, use it to select a value
                    if len(var_parts) > 1 and isinstance(res, list) and int(var_parts[-1]) < len(res):
                        var_values[v] = res[int(var_parts[-1])]
                    # if no valid index was found but we have a list, return the last element
                    elif isinstance(res, list):
                        var_values[v] = res[-1]
                    else:
                        var_values[v] = res
                except Exception as ex:
                    log.error("Could not find a value for the template variable %s: %s" % (v, str(ex)))
            else:
                log.error("No method was found to interpolate template variable %s." % v)

        return instance_tpl, var_values
コード例 #9
0
ファイル: nomadutil.py プロジェクト: kuhafa/dd-agent
class NomadUtil:
    __metaclass__ = Singleton

    def __init__(self):
        self.docker_util = DockerUtil()

        # Tags cache as a dict {co_id: (create_timestamp, [tags])}
        self._container_tags_cache = {}

    def extract_container_tags(self, co):
        """
        Queries docker inspect to get nomad tags in the container's environment vars.
        As this is expensive, it is cached in the self._nomad_tags_cache dict.
        The cache invalidation goes through invalidate_nomad_cache, called by the docker_daemon check

        :param co: container dict returned by docker-py
        :return: tags as list<string>, cached
        """

        co_id = co.get('Id', None)

        if co_id is None:
            log.warning("Invalid container object in extract_container_tags")
            return

        # Cache lookup on Id, verified on Created timestamp
        if co_id in self._container_tags_cache:
            created, tags = self._container_tags_cache[co_id]
            if created == co.get('Created', -1):
                return tags

        tags = []
        try:
            inspect_info = self.docker_util.inspect_container(co_id)
            envvars = inspect_info.get('Config', {}).get('Env', {})
            for var in envvars:
                if var.startswith(NOMAD_TASK_NAME):
                    tags.append('nomad_task:%s' %
                                var[len(NOMAD_TASK_NAME) + 1:])
                elif var.startswith(NOMAD_JOB_NAME):
                    tags.append('nomad_job:%s' % var[len(NOMAD_JOB_NAME) + 1:])
                elif var.startswith(NOMAD_ALLOC_NAME):
                    try:
                        start = var.index('.', len(NOMAD_ALLOC_NAME)) + 1
                        end = var.index('[')
                        if end <= start:
                            raise ValueError(
                                "Error extracting group from %s, check format"
                                % var)
                        tags.append('nomad_group:%s' % var[start:end])
                    except ValueError:
                        pass
                    self._container_tags_cache[co_id] = (co.get('Created'),
                                                         tags)
        except Exception as e:
            log.warning("Error while parsing Nomad tags: %s" % str(e))
        finally:
            return tags

    def invalidate_cache(self, events):
        """
        Allows cache invalidation when containers dies
        :param events from self.get_events
        """
        try:
            for ev in events:
                if ev.get('status') == 'die' and ev.get(
                        'id') in self._container_tags_cache:
                    del self._container_tags_cache[ev.get('id')]
        except Exception as e:
            log.warning("Error when invalidating nomad cache: " + str(e))
コード例 #10
0
class SDDockerBackend(AbstractSDBackend):
    """Docker-based service discovery"""
    def __init__(self, agentConfig):
        self.docker_client = DockerUtil().client
        if is_k8s():
            self.kubeutil = KubeUtil()

        try:
            self.config_store = get_config_store(agentConfig=agentConfig)
        except Exception as e:
            log.error('Failed to instantiate the config store client. '
                      'Auto-config only will be used. %s' % str(e))
            agentConfig['sd_config_backend'] = None
            self.config_store = get_config_store(agentConfig=agentConfig)

        self.VAR_MAPPING = {
            'host': self._get_host,
            'port': self._get_ports,
            'tags': self._get_additional_tags,
        }
        AbstractSDBackend.__init__(self, agentConfig)

    def _get_host(self, container_inspect):
        """Extract the host IP from a docker inspect object, or the kubelet API."""
        ip_addr = container_inspect.get('NetworkSettings', {}).get('IPAddress')
        if not ip_addr:
            if not is_k8s():
                return
            # kubernetes case
            log.debug(
                "Didn't find the IP address for container %s (%s), using the kubernetes way."
                % (container_inspect.get('Id', '')[:12],
                   container_inspect.get('Config', {}).get('Image', '')))
            pod_list = self.kubeutil.retrieve_pods_list().get('items', [])
            c_id = container_inspect.get('Id')
            for pod in pod_list:
                pod_ip = pod.get('status', {}).get('podIP')
                if pod_ip is None:
                    continue
                else:
                    c_statuses = pod.get('status',
                                         {}).get('containerStatuses', [])
                    for status in c_statuses:
                        # compare the container id with those of containers in the current pod
                        if c_id == status.get('containerID',
                                              '').split('//')[-1]:
                            ip_addr = pod_ip

        return ip_addr

    def _get_ports(self, container_inspect):
        """Extract a list of available ports from a docker inspect object. Sort them numerically."""
        c_id = container_inspect.get('Id', '')
        try:
            ports = map(lambda x: x.split('/')[0],
                        container_inspect['NetworkSettings']['Ports'].keys())
        except (IndexError, KeyError, AttributeError):
            log.debug(
                "Didn't find the port for container %s (%s), trying the kubernetes way."
                % (c_id[:12], container_inspect.get('Config', {}).get(
                    'Image', '')))
            # first we try to get it from the docker API
            # it works if the image has an EXPOSE instruction
            ports = map(
                lambda x: x.split('/')[0],
                container_inspect['Config'].get('ExposedPorts', {}).keys())
            # if it failed, try with the kubernetes API
            if not ports and is_k8s():
                co_statuses = self._get_kube_config(c_id, 'status').get(
                    'containerStatuses', [])
                c_name = None
                for co in co_statuses:
                    if co.get('containerID', '').split('//')[-1] == c_id:
                        c_name = co.get('name')
                        break
                containers = self._get_kube_config(c_id, 'spec').get(
                    'containers', [])
                for co in containers:
                    if co.get('name') == c_name:
                        ports = map(lambda x: str(x.get('containerPort')),
                                    co.get('ports', []))
        ports = sorted(ports, key=lambda x: int(x))
        return ports

    def get_tags(self, c_inspect):
        """Extract useful tags from docker or platform APIs. These are collected by default."""
        tags = []
        if is_k8s():
            pod_metadata = self._get_kube_config(c_inspect.get('Id'),
                                                 'metadata')

            if pod_metadata is None:
                log.warning("Failed to fetch pod metadata for container %s."
                            " Kubernetes tags may be missing." %
                            c_inspect.get('Id', '')[:12])
                return []
            # get labels
            kube_labels = pod_metadata.get('labels', {})
            for label, value in kube_labels.iteritems():
                tags.append('%s:%s' % (label, value))

            # get replication controller
            created_by = json.loads(
                pod_metadata.get('annotations',
                                 {}).get('kubernetes.io/created-by', '{}'))
            if created_by.get('reference',
                              {}).get('kind') == 'ReplicationController':
                tags.append('kube_replication_controller:%s' %
                            created_by.get('reference', {}).get('name'))

            # get kubernetes namespace
            tags.append('kube_namespace:%s' % pod_metadata.get('namespace'))

        return tags

    def _get_additional_tags(self, container_inspect):
        tags = []
        if is_k8s():
            pod_metadata = self._get_kube_config(container_inspect.get('Id'),
                                                 'metadata')
            pod_spec = self._get_kube_config(container_inspect.get('Id'),
                                             'spec')
            tags.append('node_name:%s' % pod_spec.get('nodeName'))
            tags.append('pod_name:%s' % pod_metadata.get('name'))
        return tags

    def _get_kube_config(self, c_id, key):
        """Get a part of a pod config from the kubernetes API"""
        pods = self.kubeutil.retrieve_pods_list().get('items', [])
        for pod in pods:
            c_statuses = pod.get('status', {}).get('containerStatuses', [])
            for status in c_statuses:
                if c_id == status.get('containerID', '').split('//')[-1]:
                    return pod.get(key, {})

    def get_configs(self):
        """Get the config for all docker containers running on the host."""
        configs = {}
        containers = [(container.get('Image').split(':')[0].split('/')[-1],
                       container.get('Id'), container.get('Labels'))
                      for container in self.docker_client.containers()]

        # used by the configcheck agent command to trace where check configs come from
        trace_config = self.agentConfig.get(TRACE_CONFIG, False)

        for image, cid, labels in containers:
            try:
                check_configs = self._get_check_configs(
                    cid, image, trace_config=trace_config) or []
                for conf in check_configs:
                    if trace_config and conf is not None:
                        source, conf = conf

                    check_name, init_config, instance = conf
                    # build instances list if needed
                    if configs.get(check_name) is None:
                        if trace_config:
                            configs[check_name] = (source, (init_config,
                                                            [instance]))
                        else:
                            configs[check_name] = (init_config, [instance])
                    else:
                        conflict_init_msg = 'Different versions of `init_config` found for check {0}. ' \
                            'Keeping the first one found.'
                        if trace_config:
                            if configs[check_name][1][0] != init_config:
                                log.warning(
                                    conflict_init_msg.format(check_name))
                            configs[check_name][1][1].append(instance)
                        else:
                            if configs[check_name][0] != init_config:
                                log.warning(
                                    conflict_init_msg.format(check_name))
                            configs[check_name][1].append(instance)
            except Exception:
                log.exception(
                    'Building config for container %s based on image %s using service'
                    ' discovery failed, leaving it alone.' % (cid[:12], image))
        return configs

    def _get_check_configs(self, c_id, image, trace_config=False):
        """Retrieve configuration templates and fill them with data pulled from docker and tags."""
        inspect = self.docker_client.inspect_container(c_id)
        config_templates = self._get_config_templates(
            image, trace_config=trace_config)
        if not config_templates:
            log.debug('No config template for container %s with image %s. '
                      'It will be left unconfigured.' % (c_id[:12], image))
            return None

        check_configs = []
        tags = self.get_tags(inspect)
        for config_tpl in config_templates:
            if trace_config:
                source, config_tpl = config_tpl
            check_name, init_config_tpl, instance_tpl, variables = config_tpl

            # insert tags in instance_tpl and process values for template variables
            instance_tpl, var_values = self._fill_tpl(inspect, instance_tpl,
                                                      variables, tags)

            tpl = self._render_template(init_config_tpl or {}, instance_tpl
                                        or {}, var_values)
            if tpl and len(tpl) == 2:
                if trace_config and len(tpl[1]) == 2:
                    source, (init_config, instance) = tpl
                    check_configs.append(
                        (source, (check_name, init_config, instance)))
                elif not trace_config:
                    init_config, instance = tpl
                    check_configs.append((check_name, init_config, instance))

        return check_configs

    def _get_config_templates(self, image_name, trace_config=False):
        """Extract config templates for an image from a K/V store and returns it as a dict object."""
        config_backend = self.agentConfig.get('sd_config_backend')
        templates = []
        if config_backend is None:
            auto_conf = True
            log.warning(
                'No supported configuration backend was provided, using auto-config only.'
            )
        else:
            auto_conf = False

        # format: [('image', {init_tpl}, {instance_tpl})] without trace_config
        # or      [(source, ('image', {init_tpl}, {instance_tpl}))] with trace_config
        raw_tpls = self.config_store.get_check_tpls(image_name,
                                                    auto_conf=auto_conf,
                                                    trace_config=trace_config)
        for tpl in raw_tpls:
            if trace_config and tpl is not None:
                # each template can come from either auto configuration or user-supplied templates
                source, tpl = tpl
            if tpl is not None and len(tpl) == 3:
                check_name, init_config_tpl, instance_tpl = tpl
            else:
                log.debug(
                    'No template was found for image %s, leaving it alone.' %
                    image_name)
                return None
            try:
                # build a list of all variables to replace in the template
                variables = self.PLACEHOLDER_REGEX.findall(str(init_config_tpl)) + \
                    self.PLACEHOLDER_REGEX.findall(str(instance_tpl))
                variables = map(lambda x: x.strip('%'), variables)
                if not isinstance(init_config_tpl, dict):
                    init_config_tpl = json.loads(init_config_tpl or '{}')
                if not isinstance(instance_tpl, dict):
                    instance_tpl = json.loads(instance_tpl or '{}')
            except json.JSONDecodeError:
                log.exception(
                    'Failed to decode the JSON template fetched for check {0}. Its configuration'
                    ' by service discovery failed for {1}.'.format(
                        check_name, image_name))
                return None

            if trace_config:
                templates.append((source, (check_name, init_config_tpl,
                                           instance_tpl, variables)))
            else:
                templates.append(
                    (check_name, init_config_tpl, instance_tpl, variables))

        return templates

    def _fill_tpl(self, inspect, instance_tpl, variables, tags=None):
        """Add container tags to instance templates and build a """
        """dict from template variable names and their values."""
        var_values = {}

        # add default tags to the instance
        if tags:
            tags += instance_tpl.get('tags', [])
            instance_tpl['tags'] = list(set(tags))

        for v in variables:
            # variables can be suffixed with an index in case a list is found
            var_parts = v.split('_')
            if var_parts[0] in self.VAR_MAPPING:
                try:
                    res = self.VAR_MAPPING[var_parts[0]](inspect)
                    if not res:
                        raise ValueError("Invalid value for variable %s." %
                                         var_parts[0])
                    # if an index is found in the variable, use it to select a value
                    if len(var_parts) > 1 and isinstance(
                            res, list) and int(var_parts[-1]) < len(res):
                        var_values[v] = res[int(var_parts[-1])]
                    # if no valid index was found but we have a list, return the last element
                    elif isinstance(res, list):
                        var_values[v] = res[-1]
                    else:
                        var_values[v] = res
                except Exception as ex:
                    log.error(
                        "Could not find a value for the template variable %s: %s"
                        % (v, str(ex)))
            else:
                log.error(
                    "No method was found to interpolate template variable %s."
                    % v)

        return instance_tpl, var_values