Esempio n. 1
0
    def _update_node_state(self, host_conf: DockerHostConfig, node_stats: NodeStats, get_usage_stats: bool):
        node_stats.labels = host_conf.labels
        try:
            my_engine = DockerClient(host_conf)
        except ZoeException as e:
            log.error(str(e))
            node_stats.status = 'offline'
            log.info('Node {} is offline'.format(host_conf.name))
            return
        else:
            node_stats.status = 'online'

        try:
            container_list = my_engine.list(only_label={'zoe_deployment_name': get_conf().deployment_name})
            info = my_engine.info()
        except ZoeException:
            return

        node_stats.container_count = len(container_list)
        node_stats.cores_total = info['NCPU']
        node_stats.memory_total = info['MemTotal']
        if info['Labels'] is not None:
            node_stats.labels += set(info['Labels'])

        node_stats.memory_reserved = sum([cont['memory_soft_limit'] for cont in container_list if cont['memory_soft_limit'] != node_stats.memory_total])
        node_stats.cores_reserved = sum([cont['cpu_quota'] / cont['cpu_period'] for cont in container_list if cont['cpu_period'] != 0])

        stats = {}
        for cont in container_list:
            stats[cont['id']] = {}
            stats[cont['id']]['core_limit'] = cont['cpu_quota'] / cont['cpu_period']
            stats[cont['id']]['mem_limit'] = cont['memory_soft_limit']
        node_stats.service_stats = stats

        if get_usage_stats:
            if get_conf().kairosdb_enable:
                kdb = KairosDBInMetrics()
                for cont in container_list:
                    stats[cont['id']].update(kdb.get_service_usage(cont['name']))

                node_stats.memory_in_use = sum([stat['mem_usage'] for stat in stats.values()])
                node_stats.cores_in_use = sum([stat['cpu_usage'] for stat in stats.values()])
            else:
                for cont in container_list:
                    try:
                        aux = my_engine.stats(cont['id'], stream=False)  # this call is very slow (>~1sec)
                        if 'usage' in aux['memory_stats']:
                            stats[cont['id']]['mem_usage'] = aux['memory_stats']['usage']
                        else:
                            stats[cont['id']]['mem_usage'] = 0
                        stats[cont['id']]['cpu_usage'] = self._get_core_usage(aux)
                    except ZoeException:
                        continue

                node_stats.memory_in_use = sum([stat['mem_usage'] for stat in stats.values()])
                node_stats.cores_in_use = sum([stat['cpu_usage'] for stat in stats.values()])
        else:
            node_stats.memory_in_use = 0
            node_stats.cores_in_use = 0
Esempio n. 2
0
    def info(self) -> ClusterStats:  # pylint: disable=too-many-locals
        """Retrieve Kubernetes cluster statistics."""
        pl_status = ClusterStats()

        node_list = pykube.Node.objects(
            self.api).filter(namespace=pykube.all).iterator()
        node_dict = {}

        # Get basic information from nodes
        for node in node_list:
            nss = NodeStats(node.name)
            nss.cores_total = float(node.obj['status']['allocatable']['cpu']
                                    [1])  ## Bug found on GKE
            nss.memory_total = humanfriendly.parse_size(
                node.obj['status']['allocatable']['memory'])
            nss.labels = node.obj['metadata']['labels']
            nss.status = 'online'
            node_dict[str(socket.gethostbyname(node.name))] = nss

        # Get information from all running pods, then accumulate to nodes
        pod_list = pykube.Pod.objects(
            self.api).filter(namespace=pykube.all).iterator()
        for pod in pod_list:
            try:
                host_ip = pod.obj['status']['hostIP']
            except KeyError:
                continue
            nss = node_dict[host_ip]
            nss.container_count += 1
            spec_cont = pod.obj['spec']['containers'][0]
            if 'resources' in spec_cont:
                if 'requests' in spec_cont['resources']:
                    if 'memory' in spec_cont['resources']['requests']:
                        memory = spec_cont['resources']['requests']['memory']
                        nss.memory_reserved = nss.memory_reserved + humanfriendly.parse_size(
                            memory)
                    if 'cpu' in spec_cont['resources']['requests']:
                        cpu = spec_cont['resources']['requests']['cpu']
                        # ex: cpu could be 100m or 0.1
                        cpu_splitted = cpu.split('m')
                        if len(cpu_splitted) > 1:
                            cpu_float = int(cpu_splitted[0]) / 1000
                        else:
                            cpu_float = int(cpu_splitted[0])
                        nss.cores_reserved = round(
                            nss.cores_reserved + cpu_float, 3)

        for node_ip in node_dict:
            pl_status.nodes.append(node_dict[node_ip])

        return pl_status
Esempio n. 3
0
    def info(self) -> ClusterStats:
        """Retrieve Swarm statistics. The Docker API returns a mess difficult to parse."""
        info = self.cli.info()
        pl_status = ClusterStats()

        # SystemStatus is a list...
        idx = 0  # Role, skip
        idx += 1
        assert 'Strategy' in info["SystemStatus"][idx][0]
        pl_status.placement_strategy = info["SystemStatus"][idx][1]
        idx += 1
        assert 'Filters' in info["SystemStatus"][idx][0]
        pl_status.active_filters = [x.strip() for x in info["SystemStatus"][idx][1].split(", ")]
        idx += 1
        assert 'Nodes' in info["SystemStatus"][idx][0]
        node_count = int(info["SystemStatus"][idx][1])
        idx += 1  # At index 4 the nodes begin
        for node in range(node_count):
            idx2 = 0
            node_stats = NodeStats(info["SystemStatus"][idx + node][0].strip())
            node_stats.docker_endpoint = info["SystemStatus"][idx + node][1]
            idx2 += 1  # ID, skip
            idx2 += 1  # Status
            if info["SystemStatus"][idx + node + idx2][1] == 'Healthy':
                node_stats.status = 'online'
            else:
                node_stats.status = 'offline'
            idx2 += 1  # Containers
            node_stats.container_count = int(info["SystemStatus"][idx + node + idx2][1].split(' ')[0])
            idx2 += 1  # CPUs
            node_stats.cores_reserved = int(info["SystemStatus"][idx + node + idx2][1].split(' / ')[0])
            node_stats.cores_total = int(info["SystemStatus"][idx + node + idx2][1].split(' / ')[1])
            idx2 += 1  # Memory
            node_stats.memory_reserved = info["SystemStatus"][idx + node + idx2][1].split(' / ')[0]
            node_stats.memory_total = info["SystemStatus"][idx + node + idx2][1].split(' / ')[1]
            idx2 += 1  # Labels
            node_stats.labels = info["SystemStatus"][idx + node + idx2][1].split(', ')
            idx2 += 1  # Last update
            node_stats.last_update = info["SystemStatus"][idx + node + idx2][1]
            idx2 += 1  # Docker version
            node_stats.server_version = info["SystemStatus"][idx + node + idx2][1]

            node_stats.memory_reserved = humanfriendly.parse_size(node_stats.memory_reserved)
            node_stats.memory_total = humanfriendly.parse_size(node_stats.memory_total)

            pl_status.nodes.append(node_stats)
            idx += idx2
        pl_status.timestamp = time.time()
        return pl_status