def is_excluded(self, cid, pod_uid=None): """ Queries the agent6 container filter interface. It retrieves container name + image from the podlist, so static pod filtering is not supported. Result is cached between calls to avoid the python-go switching cost for prometheus metrics (will be called once per metric) :param cid: container id :param pod_uid: pod UID for static pod detection :return: bool """ if not cid: return True if cid in self.cache: return self.cache[cid] if pod_uid and pod_uid in self.static_pod_uids: self.cache[cid] = False return False if cid not in self.containers: # Filter out metrics not coming from a container (system slices) self.cache[cid] = True return True ctr = self.containers[cid] if not ("name" in ctr and "image" in ctr): # Filter out invalid containers self.cache[cid] = True return True excluded = c_is_excluded(ctr.get("name"), ctr.get("image"), self.container_id_to_namespace.get(cid, "")) self.cache[cid] = excluded return excluded
def is_namespace_excluded(self, namespace): """ Queries the agent container filter interface to check whether a Kubernetes namespace should be excluded. The result is cached between calls to avoid the python-go switching cost. :param namespace: namespace :return: bool """ if not namespace: return False # Sent empty container name and image because we are interested in # applying only the namespace exclusion rules. excluded = c_is_excluded('', '', namespace) self.cache_namespace_exclusion[namespace] = excluded return excluded
def check(self, instance): metadata_endpoint = API_ENDPOINT + METADATA_ROUTE stats_endpoint = API_ENDPOINT + STATS_ROUTE custom_tags = instance.get('tags', []) try: request = self.http.get(metadata_endpoint) except requests.exceptions.Timeout: msg = 'Fargate {} endpoint timed out after {} seconds'.format( metadata_endpoint, self.http.options['timeout']) self.service_check('fargate_check', AgentCheck.CRITICAL, message=msg, tags=custom_tags) self.log.exception(msg) return except requests.exceptions.RequestException: msg = 'Error fetching Fargate {} endpoint'.format( metadata_endpoint) self.service_check('fargate_check', AgentCheck.CRITICAL, message=msg, tags=custom_tags) self.log.exception(msg) return if request.status_code != 200: msg = 'Fargate {} endpoint responded with {} HTTP code'.format( metadata_endpoint, request.status_code) self.service_check('fargate_check', AgentCheck.CRITICAL, message=msg, tags=custom_tags) self.log.warning(msg) return metadata = {} try: metadata = request.json() except ValueError: msg = 'Cannot decode Fargate {} endpoint response'.format( metadata_endpoint) self.service_check('fargate_check', AgentCheck.WARNING, message=msg, tags=custom_tags) self.log.warning(msg, exc_info=True) return if not all(k in metadata for k in ['Cluster', 'Containers']): msg = 'Missing critical metadata in {} endpoint response'.format( metadata_endpoint) self.service_check('fargate_check', AgentCheck.WARNING, message=msg, tags=custom_tags) self.log.warning(msg) return exlcuded_cid = set() container_tags = {} for container in metadata['Containers']: c_id = container['DockerId'] # Check if container is excluded if c_is_excluded(container.get("Name", ""), container.get("Image", "")): exlcuded_cid.add(c_id) continue tagger_tags = get_tags('container_id://%s' % c_id, True) or [] # Compatibility with previous versions of the check compat_tags = [] for tag in tagger_tags: if tag.startswith(("task_family:", "task_version:")): compat_tags.append("ecs_" + tag) elif tag.startswith("cluster_name:"): compat_tags.append( tag.replace("cluster_name:", "ecs_cluster:")) elif tag.startswith("container_name:"): compat_tags.append( tag.replace("container_name:", "docker_name:")) container_tags[c_id] = tagger_tags + compat_tags + custom_tags if container.get('Limits', {}).get('CPU', 0) > 0: self.gauge('ecs.fargate.cpu.limit', container['Limits']['CPU'], container_tags[c_id]) try: request = self.http.get(stats_endpoint) except requests.exceptions.Timeout: msg = 'Fargate {} endpoint timed out after {} seconds'.format( stats_endpoint, self.http.options['timeout']) self.service_check('fargate_check', AgentCheck.WARNING, message=msg, tags=custom_tags) self.log.warning(msg, exc_info=True) return except requests.exceptions.RequestException: msg = 'Error fetching Fargate {} endpoint'.format(stats_endpoint) self.service_check('fargate_check', AgentCheck.WARNING, message=msg, tags=custom_tags) self.log.warning(msg, exc_info=True) return if request.status_code != 200: msg = 'Fargate {} endpoint responded with {} HTTP code'.format( stats_endpoint, request.status_code) self.service_check('fargate_check', AgentCheck.WARNING, message=msg, tags=custom_tags) self.log.warning(msg) return stats = {} try: stats = request.json() except ValueError: msg = 'Cannot decode Fargate {} endpoint response'.format( stats_endpoint) self.service_check('fargate_check', AgentCheck.WARNING, message=msg, tags=custom_tags) self.log.warning(msg, exc_info=True) for container_id, container_stats in iteritems(stats): if container_id not in exlcuded_cid: self.submit_perf_metrics(instance, container_tags, container_id, container_stats) self.service_check('fargate_check', AgentCheck.OK, tags=custom_tags)