def _update_node_state(self, host_conf: DockerHostConfig, node_stats: NodeStats, get_usage_stats: bool): node_stats.labels = host_conf.labels try: my_engine = DockerClient(host_conf) except ZoeException as e: log.error(str(e)) node_stats.status = 'offline' log.info('Node {} is offline'.format(host_conf.name)) return else: node_stats.status = 'online' try: container_list = my_engine.list(only_label={'zoe_deployment_name': get_conf().deployment_name}) info = my_engine.info() except ZoeException: return node_stats.container_count = len(container_list) node_stats.cores_total = info['NCPU'] node_stats.memory_total = info['MemTotal'] if info['Labels'] is not None: node_stats.labels += set(info['Labels']) node_stats.memory_reserved = sum([cont['memory_soft_limit'] for cont in container_list if cont['memory_soft_limit'] != node_stats.memory_total]) node_stats.cores_reserved = sum([cont['cpu_quota'] / cont['cpu_period'] for cont in container_list if cont['cpu_period'] != 0]) stats = {} for cont in container_list: stats[cont['id']] = {} stats[cont['id']]['core_limit'] = cont['cpu_quota'] / cont['cpu_period'] stats[cont['id']]['mem_limit'] = cont['memory_soft_limit'] node_stats.service_stats = stats if get_usage_stats: if get_conf().kairosdb_enable: kdb = KairosDBInMetrics() for cont in container_list: stats[cont['id']].update(kdb.get_service_usage(cont['name'])) node_stats.memory_in_use = sum([stat['mem_usage'] for stat in stats.values()]) node_stats.cores_in_use = sum([stat['cpu_usage'] for stat in stats.values()]) else: for cont in container_list: try: aux = my_engine.stats(cont['id'], stream=False) # this call is very slow (>~1sec) if 'usage' in aux['memory_stats']: stats[cont['id']]['mem_usage'] = aux['memory_stats']['usage'] else: stats[cont['id']]['mem_usage'] = 0 stats[cont['id']]['cpu_usage'] = self._get_core_usage(aux) except ZoeException: continue node_stats.memory_in_use = sum([stat['mem_usage'] for stat in stats.values()]) node_stats.cores_in_use = sum([stat['cpu_usage'] for stat in stats.values()]) else: node_stats.memory_in_use = 0 node_stats.cores_in_use = 0
def _host_subthread(self, host_config: DockerHostConfig): log.info("Synchro thread for host {} started".format(host_config.name)) self.host_stats[host_config.name] = NodeStats(host_config.name) while True: time_start = time.time() try: my_engine = DockerClient(host_config) container_list = my_engine.list( only_label={ 'zoe_deployment_name': get_conf().deployment_name }) info = my_engine.info() except ZoeException as e: self.host_stats[host_config.name].status = 'offline' log.error(str(e)) log.info('Node {} is offline'.format(host_config.name)) else: if self.host_stats[host_config.name].status == 'offline': log.info('Node {} is now online'.format(host_config.name)) self.host_stats[host_config.name].status = 'online' self.host_stats[ host_config.name].container_count = info['Containers'] self.host_stats[host_config.name].cores_total = info['NCPU'] self.host_stats[ host_config.name].memory_total = info['MemTotal'] self.host_stats[host_config.name].labels = host_config.labels if info['Labels'] is not None: self.host_stats[host_config.name].labels.union( set(info['Labels'])) self.host_stats[host_config.name].memory_allocated = sum([ cont['memory_soft_limit'] for cont in container_list if cont['memory_soft_limit'] != info['MemTotal'] ]) self.host_stats[host_config.name].cores_allocated = sum([ cont['cpu_quota'] / cont['cpu_period'] for cont in container_list if cont['cpu_period'] != 0 ]) stats = {} self.host_stats[host_config.name].memory_reserved = 0 self.host_stats[host_config.name].cores_reserved = 0 for cont in container_list: service = self.state.services.select( only_one=True, backend_host=host_config.name, backend_id=cont['id']) if service is None: log.warning( 'Container {} on host {} has no corresponding service' .format(cont['name'], host_config.name)) if cont['state'] == Service.BACKEND_DIE_STATUS: log.warning( 'Terminating dead and orphan container {}'. format(cont['name'])) my_engine.terminate_container(cont['id'], delete=True) continue self._update_service_status(service, cont) self.host_stats[ host_config. name].memory_reserved += service.resource_reservation.memory.min self.host_stats[ host_config. name].cores_reserved += service.resource_reservation.cores.min stats[service.id] = { 'core_limit': cont['cpu_quota'] / cont['cpu_period'], 'mem_limit': cont['memory_soft_limit'] } self.host_stats[host_config.name].service_stats = stats self.host_stats[host_config.name].images = [] for dk_image in my_engine.list_images(): image = { 'id': dk_image.attrs['Id'], 'size': dk_image.attrs['Size'], 'names': dk_image.tags # type: list } for name in image['names']: if name[-7:] == ':latest': # add an image with the name without 'latest' to fake Docker image lookup algorithm image['names'].append(name[:-7]) break self.host_stats[host_config.name].images.append(image) sleep_time = CHECK_INTERVAL - (time.time() - time_start) if sleep_time <= 0: log.warning( 'synchro thread for host {} is late by {:.2f} seconds'. format(host_config.name, sleep_time * -1)) sleep_time = 0 if self.stop.wait(timeout=sleep_time): break log.info("Synchro thread for host {} stopped".format(host_config.name))