Beispiel #1
0
    def _is_container_excluded(self, container):
        """Check if a container is excluded according to the filter rules.

        Requires _filter_containers to run first.
        """
        container_name = DockerUtil.container_name_extractor(container)[0]
        return container_name in self._filtered_containers
Beispiel #2
0
    def _format_events(self, aggregated_events, containers_by_id):
        events = []
        for image_name, event_group in aggregated_events.iteritems():
            container_tags = set()
            low_prio_events = []
            normal_prio_events = []

            for event in event_group:
                container_name = event['id'][:11]

                if event['id'] in containers_by_id:
                    cont = containers_by_id[event['id']]
                    container_name = DockerUtil.container_name_extractor(cont)[0]
                    container_tags.update(self._get_tags(cont, PERFORMANCE))
                    container_tags.add('container_name:%s' % container_name)

                # health checks generate tons of these so we treat them separately and lower their priority
                if event['status'].startswith('exec_create:') or event['status'].startswith('exec_start:'):
                    low_prio_events.append((event, container_name))
                else:
                    normal_prio_events.append((event, container_name))

            exec_event = self._create_dd_event(low_prio_events, image_name, container_tags, priority='Low')
            if exec_event:
                events.append(exec_event)

            normal_event = self._create_dd_event(normal_prio_events, image_name, container_tags, priority='Normal')
            if normal_event:
                events.append(normal_event)

        return events
    def _report_performance_metrics(self, containers_by_id):

        containers_without_proc_root = []
        for container in containers_by_id.itervalues():
            if self._is_container_excluded(container) or not self._is_container_running(container):
                continue

            tags = self._get_tags(container, PERFORMANCE)

            try:
                self._report_cgroup_metrics(container, tags)
                if "_proc_root" not in container:
                    containers_without_proc_root.append(DockerUtil.container_name_extractor(container)[0])
                    continue
                self._report_net_metrics(container, tags)
            except BogusPIDException as e:
                self.log.warning('Unable to report cgroup metrics: %s', e)

        if containers_without_proc_root:
            message = "Couldn't find pid directory for containers: {0}. They'll be missing network metrics".format(
                ", ".join(containers_without_proc_root))
            if not Platform.is_k8s():
                self.warning(message)
            else:
                # On kubernetes, this is kind of expected. Network metrics will be collected by the kubernetes integration anyway
                self.log.debug(message)
Beispiel #4
0
    def _get_and_count_containers(self, custom_cgroups=False):
        """List all the containers from the API, filter and count them."""

        # Querying the size of containers is slow, we don't do it at each run
        must_query_size = self.collect_container_size and self._latest_size_query == 0
        self._latest_size_query = (self._latest_size_query + 1) % SIZE_REFRESH_RATE

        running_containers_count = Counter()
        all_containers_count = Counter()

        try:
            containers = self.docker_client.containers(all=True, size=must_query_size)
        except Exception as e:
            message = "Unable to list Docker containers: {0}".format(e)
            self.service_check(SERVICE_CHECK_NAME, AgentCheck.CRITICAL,
                               message=message)
            raise Exception(message)

        else:
            self.service_check(SERVICE_CHECK_NAME, AgentCheck.OK)

        # Filter containers according to the exclude/include rules
        self._filter_containers(containers)

        containers_by_id = {}

        for container in containers:
            container_name = DockerUtil.container_name_extractor(container)[0]

            container_status_tags = self._get_tags(container, CONTAINER)

            all_containers_count[tuple(sorted(container_status_tags))] += 1
            if self._is_container_running(container):
                running_containers_count[tuple(sorted(container_status_tags))] += 1

            # Check if the container is included/excluded via its tags
            if self._is_container_excluded(container):
                self.log.debug("Container {0} is excluded".format(container_name))
                continue

            containers_by_id[container['Id']] = container

            # grab pid via API if custom cgroups - otherwise we won't find process when
            # crawling for pids.
            if custom_cgroups:
                try:
                    inspect_dict = self.docker_client.inspect_container(container_name)
                    container['_pid'] = inspect_dict['State']['Pid']
                except Exception as e:
                    self.log.debug("Unable to inspect Docker container: %s", e)


        for tags, count in running_containers_count.iteritems():
            self.gauge("docker.containers.running", count, tags=list(tags))

        for tags, count in all_containers_count.iteritems():
            stopped_count = count - running_containers_count[tags]
            self.gauge("docker.containers.stopped", stopped_count, tags=list(tags))

        return containers_by_id
Beispiel #5
0
    def _format_events(self, aggregated_events, containers_by_id):
        events = []
        for image_name, event_group in aggregated_events.iteritems():
            container_tags = set()
            low_prio_events = []
            normal_prio_events = []

            for event in event_group:
                container_name = event['id'][:11]

                if event['id'] in containers_by_id:
                    cont = containers_by_id[event['id']]
                    container_name = DockerUtil.container_name_extractor(cont)[0]
                    container_tags.update(self._get_tags(cont, PERFORMANCE))
                    container_tags.add('container_name:%s' % container_name)

                # health checks generate tons of these so we treat them separately and lower their priority
                if event['status'].startswith('exec_create:') or event['status'].startswith('exec_start:'):
                    low_prio_events.append((event, container_name))
                else:
                    normal_prio_events.append((event, container_name))

            exec_event = self._create_dd_event(low_prio_events, image_name, container_tags, priority='Low')
            if exec_event:
                events.append(exec_event)

            normal_event = self._create_dd_event(normal_prio_events, image_name, container_tags, priority='Normal')
            if normal_event:
                events.append(normal_event)

        return events
Beispiel #6
0
    def _report_exit_codes(self, events, containers_by_id):
        for event in events:
            container_tags = set()
            container = containers_by_id.get(event.get('id'))
            # Skip events related to filtered containers
            if container is not None and self._is_container_excluded(
                    container):
                continue

            # Report the exit code in case of a DIE event
            if container is not None and event['status'] == 'die':
                container_name = DockerUtil.container_name_extractor(
                    container)[0]
                container_tags.update(self._get_tags(container, CONTAINER))
                container_tags.add('container_name:%s' % container_name)
                try:
                    exit_code = int(event['Actor']['Attributes']['exitCode'])
                    message = 'Container %s exited with %s' % (container_name,
                                                               exit_code)
                    status = AgentCheck.OK if exit_code == 0 else AgentCheck.CRITICAL
                    self.service_check(EXIT_SERVICE_CHECK_NAME,
                                       status,
                                       tags=list(container_tags),
                                       message=message)
                except KeyError:
                    self.log.warning(
                        'Unable to collect the exit code for container %s' %
                        container_name)
Beispiel #7
0
    def _is_container_excluded(self, container):
        """Check if a container is excluded according to the filter rules.

        Requires _filter_containers to run first.
        """
        container_name = DockerUtil.container_name_extractor(container)[0]
        return container_name in self._filtered_containers
Beispiel #8
0
    def _report_performance_metrics(self, containers_by_id):

        containers_without_proc_root = []
        for container_id, container in containers_by_id.iteritems():
            if self._is_container_excluded(container) or not self._is_container_running(container):
                continue

            tags = self._get_tags(container, PERFORMANCE)

            try:
                self._report_cgroup_metrics(container, tags)
                if "_proc_root" not in container:
                    containers_without_proc_root.append(DockerUtil.container_name_extractor(container)[0])
                    continue
                self._report_net_metrics(container, tags)
            except BogusPIDException as e:
                self.log.warning('Unable to report cgroup metrics for container %s: %s', container_id[:12], e)

        if containers_without_proc_root:
            message = "Couldn't find pid directory for containers: {0}. They'll be missing network metrics".format(
                ", ".join(containers_without_proc_root))
            if not Platform.is_k8s():
                self.warning(message)
            else:
                # On kubernetes, this is kind of expected. Network metrics will be collected by the kubernetes integration anyway
                self.log.debug(message)
Beispiel #9
0
    def _get_and_count_containers(self, custom_cgroups=False):
        """List all the containers from the API, filter and count them."""

        # Querying the size of containers is slow, we don't do it at each run
        must_query_size = self.collect_container_size and self._latest_size_query == 0
        self._latest_size_query = (self._latest_size_query + 1) % SIZE_REFRESH_RATE

        running_containers_count = Counter()
        all_containers_count = Counter()

        try:
            containers = self.docker_client.containers(all=True, size=must_query_size)
        except Exception as e:
            message = "Unable to list Docker containers: {0}".format(e)
            self.service_check(SERVICE_CHECK_NAME, AgentCheck.CRITICAL,
                               message=message)
            raise Exception(message)

        else:
            self.service_check(SERVICE_CHECK_NAME, AgentCheck.OK)

        # Filter containers according to the exclude/include rules
        self._filter_containers(containers)

        containers_by_id = {}

        for container in containers:
            container_name = DockerUtil.container_name_extractor(container)[0]

            container_status_tags = self._get_tags(container, CONTAINER)

            all_containers_count[tuple(sorted(container_status_tags))] += 1
            if self._is_container_running(container):
                running_containers_count[tuple(sorted(container_status_tags))] += 1

            # Check if the container is included/excluded via its tags
            if self._is_container_excluded(container):
                self.log.debug("Container {0} is excluded".format(container_name))
                continue

            containers_by_id[container['Id']] = container

            # grab pid via API if custom cgroups - otherwise we won't find process when
            # crawling for pids.
            if custom_cgroups:
                try:
                    inspect_dict = self.docker_client.inspect_container(container_name)
                    container['_pid'] = inspect_dict['State']['Pid']
                except Exception as e:
                    self.log.debug("Unable to inspect Docker container: %s", e)


        for tags, count in running_containers_count.iteritems():
            self.gauge("docker.containers.running", count, tags=list(tags))

        for tags, count in all_containers_count.iteritems():
            stopped_count = count - running_containers_count[tags]
            self.gauge("docker.containers.stopped", stopped_count, tags=list(tags))

        return containers_by_id
Beispiel #10
0
    def _format_events(self, aggregated_events, containers_by_id):
        events = []
        for image_name, event_group in aggregated_events.iteritems():
            container_tags = set()
            filtered_events_count = 0
            normal_prio_events = []

            for event in event_group:
                # Only keep events that are not configured to be filtered out
                if event['status'].startswith(self.filtered_event_types):
                    filtered_events_count += 1
                    continue
                container_name = event['id'][:11]

                if event['id'] in containers_by_id:
                    cont = containers_by_id[event['id']]
                    container_name = DockerUtil.container_name_extractor(cont)[0]
                    container_tags.update(self._get_tags(cont, PERFORMANCE))
                    container_tags.add('container_name:%s' % container_name)
                    # Add additionnal docker event attributes as tag
                    for attr in self.event_attributes_as_tags:
                        if attr in event['Actor']['Attributes'] and attr not in EXCLUDED_ATTRIBUTES:
                            container_tags.add('%s:%s' % (attr, event['Actor']['Attributes'][attr]))

                normal_prio_events.append((event, container_name))
            if filtered_events_count:
                self.log.debug('%d events were filtered out because of ignored event type' % filtered_events_count)

            normal_event = self._create_dd_event(normal_prio_events, image_name, container_tags, priority='Normal')
            if normal_event:
                events.append(normal_event)

        return events
Beispiel #11
0
 def test_container_name_extraction(self):
     containers = [
         ({"Id": "deadbeef"}, ["deadbeef"]),
         ({"Names": ["/redis"], "Id": "deadbeef"}, ["redis"]),
         ({"Names": ["/mongo", "/redis/mongo"], "Id": "deadbeef"}, ["mongo"]),
         ({"Names": ["/redis/mongo", "/mongo"], "Id": "deadbeef"}, ["mongo"]),
     ]
     for co in containers:
         self.assertEqual(DockerUtil.container_name_extractor(co[0]), co[1])
Beispiel #12
0
 def test_container_name_extraction(self):
     containers = [
         ({'Id': 'deadbeef'}, ['deadbeef']),
         ({'Names': ['/redis'], 'Id': 'deadbeef'}, ['redis']),
         ({'Names': ['/mongo', '/redis/mongo'], 'Id': 'deadbeef'}, ['mongo']),
         ({'Names': ['/redis/mongo', '/mongo'], 'Id': 'deadbeef'}, ['mongo']),
     ]
     for co in containers:
         self.assertEqual(DockerUtil.container_name_extractor(co[0]), co[1])
 def test_container_name_extraction(self):
     containers = [
         ({'Id': 'deadbeef'}, ['deadbeef']),
         ({'Names': ['/redis'], 'Id': 'deadbeef'}, ['redis']),
         ({'Names': ['/mongo', '/redis/mongo'], 'Id': 'deadbeef'}, ['mongo']),
         ({'Names': ['/redis/mongo', '/mongo'], 'Id': 'deadbeef'}, ['mongo']),
     ]
     for co in containers:
         self.assertEqual(DockerUtil.container_name_extractor(co[0]), co[1])
Beispiel #14
0
def print_containers():
    containers = DockerUtil().client.containers()
    print("\nContainers info:\n")
    print("Number of containers found: %s" % len(containers))
    for co in containers:
        c_id = 'ID: %s' % co.get('Id')[:12]
        c_image = 'image: %s' % co.get('Image')
        c_name = 'name: %s' % DockerUtil.container_name_extractor(co)[0]
        print("\t- %s %s %s" % (c_id, c_image, c_name))
    print('\n')
Beispiel #15
0
def print_containers():
    containers = DockerUtil().client.containers()
    print("\nContainers info:\n")
    print("Number of containers found: %s" % len(containers))
    for co in containers:
        c_id = 'ID: %s' % co.get('Id')[:12]
        c_image = 'image: %s' % co.get('Image')
        c_name = 'name: %s' % DockerUtil.container_name_extractor(co)[0]
        print("\t- %s %s %s" % (c_id, c_image, c_name))
    print('\n')
Beispiel #16
0
    def _filter_containers(self, containers):
        if not self._filtering_enabled:
            return

        self._filtered_containers = set()
        for container in containers:
            container_tags = self._get_tags(container, FILTERED)
            if self._are_tags_filtered(container_tags):
                container_name = DockerUtil.container_name_extractor(container)[0]
                self._filtered_containers.add(container_name)
                self.log.debug("Container {0} is filtered".format(container_name))
Beispiel #17
0
    def _filter_containers(self, containers):
        if not self._filtering_enabled:
            return

        self._filtered_containers = set()
        for container in containers:
            container_tags = self._get_tags(container, FILTERED)
            if self._are_tags_filtered(container_tags):
                container_name = DockerUtil.container_name_extractor(container)[0]
                self._filtered_containers.add(container_name)
                self.log.debug("Container {0} is filtered".format(container_name))
Beispiel #18
0
    def _filter_containers(self, containers):
        if not self.docker_util.filtering_enabled:
            return

        self._filtered_containers = set()
        for container in containers:
            container_tags = self._get_tags(container, FILTERED)
            # exclude/include patterns are stored in docker_util to share them with other container-related checks
            if self.docker_util.are_tags_filtered(container_tags):
                container_name = DockerUtil.container_name_extractor(container)[0]
                self._filtered_containers.add(container_name)
                self.log.debug("Container {0} is filtered".format(container_name))
Beispiel #19
0
    def _filter_containers(self, containers):
        if not self.docker_util.filtering_enabled:
            return

        self._filtered_containers = set()
        for container in containers:
            container_tags = self._get_tags(container, FILTERED)
            # exclude/include patterns are stored in docker_util to share them with other container-related checks
            if self.docker_util.are_tags_filtered(container_tags):
                container_name = DockerUtil.container_name_extractor(container)[0]
                self._filtered_containers.add(container_name)
                self.log.debug("Container {0} is filtered".format(container_name))
Beispiel #20
0
    def _format_events(self, aggregated_events, containers_by_id):
        events = []
        for image_name, event_group in aggregated_events.iteritems():
            max_timestamp = 0
            status = defaultdict(int)
            status_change = []
            container_tags = set()
            for event in event_group:
                max_timestamp = max(max_timestamp, int(event['time']))
                status[event['status']] += 1
                container_name = event['id'][:11]
                if event['id'] in containers_by_id:
                    cont = containers_by_id[event['id']]
                    container_name = DockerUtil.container_name_extractor(cont)[0]
                    container_tags.update(self._get_tags(cont, PERFORMANCE))
                    container_tags.add('container_name:%s' % container_name)

                status_change.append([container_name, event['status']])

            status_text = ", ".join(["%d %s" % (count, st) for st, count in status.iteritems()])
            msg_title = "%s %s on %s" % (image_name, status_text, self.hostname)
            msg_body = (
                "%%%\n"
                "{image_name} {status} on {hostname}\n"
                "```\n{status_changes}\n```\n"
                "%%%"
            ).format(
                image_name=image_name,
                status=status_text,
                hostname=self.hostname,
                status_changes="\n".join(
                    ["%s \t%s" % (change[1].upper(), change[0]) for change in status_change])
            )

            if any(error in status_text for error in ERROR_ALERT_TYPE):
                alert_type = "error"
            else:
                alert_type = None

            events.append({
                'timestamp': max_timestamp,
                'host': self.hostname,
                'event_type': EVENT_TYPE,
                'msg_title': msg_title,
                'msg_text': msg_body,
                'source_type_name': EVENT_TYPE,
                'event_object': 'docker:%s' % image_name,
                'tags': list(container_tags),
                'alert_type': alert_type
            })

        return events
Beispiel #21
0
    def _report_exit_codes(self, events, containers_by_id):
        for event in events:
            container_tags = set()
            container = containers_by_id.get(event.get('id'))
            # Skip events related to filtered containers
            if container is not None and self._is_container_excluded(container):
                continue

            # Report the exit code in case of a DIE event
            if container is not None and event['status'] == 'die':
                container_name = DockerUtil.container_name_extractor(container)[0]
                container_tags.update(self._get_tags(container, CONTAINER))
                container_tags.add('container_name:%s' % container_name)
                try:
                    exit_code = int(event['Actor']['Attributes']['exitCode'])
                    message = 'Container %s exited with %s' % (container_name, exit_code)
                    status = AgentCheck.OK if exit_code == 0 else AgentCheck.CRITICAL
                    self.service_check(EXIT_SERVICE_CHECK_NAME, status, tags=list(container_tags), message=message)
                except KeyError:
                    self.log.warning('Unable to collect the exit code for container %s' % container_name)
Beispiel #22
0
    def _report_performance_metrics(self, containers_by_id):

        containers_without_proc_root = []
        for container in containers_by_id.itervalues():
            if self._is_container_excluded(
                    container) or not self._is_container_running(container):
                continue

            tags = self._get_tags(container, PERFORMANCE)
            self._report_cgroup_metrics(container, tags)
            if "_proc_root" not in container:
                containers_without_proc_root.append(
                    DockerUtil.container_name_extractor(container)[0])
                continue
            self._report_net_metrics(container, tags)

        if containers_without_proc_root:
            message = "Couldn't find pid directory for containers: {0}. They'll be missing network metrics".format(
                ", ".join(containers_without_proc_root))
            if not self.is_k8s():
                self.warning(message)
            else:
                self.log.debug(message)
Beispiel #23
0
    def _is_container_excluded(self, container):

        container_name = DockerUtil.container_name_extractor(container)[0]
        return container_name in self._filtered_containers
Beispiel #24
0
        except Exception, e:
            message = "Unable to list Docker containers: {0}".format(e)
            self.service_check(SERVICE_CHECK_NAME, AgentCheck.CRITICAL,
                               message=message)
            raise Exception(message)

        else:
            self.service_check(SERVICE_CHECK_NAME, AgentCheck.OK)

        # Filter containers according to the exclude/include rules
        self._filter_containers(containers)

        containers_by_id = {}

        for container in containers:
            container_name = DockerUtil.container_name_extractor(container)[0]

            container_status_tags = self._get_tags(container, CONTAINER)

            all_containers_count[tuple(sorted(container_status_tags))] += 1
            if self._is_container_running(container):
                running_containers_count[tuple(sorted(container_status_tags))] += 1

            # Check if the container is included/excluded via its tags
            if self._is_container_excluded(container):
                self.log.debug("Container {0} is excluded".format(container_name))
                continue

            containers_by_id[container['Id']] = container

        for tags, count in running_containers_count.iteritems():
Beispiel #25
0
            message = "Unable to list Docker containers: {0}".format(e)
            self.service_check(SERVICE_CHECK_NAME,
                               AgentCheck.CRITICAL,
                               message=message)
            raise Exception(message)

        else:
            self.service_check(SERVICE_CHECK_NAME, AgentCheck.OK)

        # Filter containers according to the exclude/include rules
        self._filter_containers(containers)

        containers_by_id = {}

        for container in containers:
            container_name = DockerUtil.container_name_extractor(container)[0]

            container_status_tags = self._get_tags(container, CONTAINER)

            all_containers_count[tuple(sorted(container_status_tags))] += 1
            if self._is_container_running(container):
                running_containers_count[tuple(
                    sorted(container_status_tags))] += 1

            # Check if the container is included/excluded via its tags
            if self._is_container_excluded(container):
                self.log.debug(
                    "Container {0} is excluded".format(container_name))
                continue

            containers_by_id[container['Id']] = container