def list_available_images(self, node_name): """List the images available on the specified node.""" host_conf = None for conf in self.docker_config: if conf.name == node_name: host_conf = conf break if host_conf is None: log.error('Unknown node {}, returning empty image list'.format( node_name)) return [] my_engine = DockerClient(host_conf) image_list = [] for dk_image in my_engine.list_images(): image = { 'id': dk_image.attrs['Id'], 'size': dk_image.attrs['Size'], 'names': dk_image.tags } for name in image['names']: if name[-7:] == ':latest': # add an image with the name without 'latest' to fake Docker image lookup algorithm image['names'].append(name[:-7]) break image_list.append(image) return image_list
def preload_image(self, image_name): """Pull an image from a Docker registry into each host. We shuffle the list to prevent the scheduler to find always the first host in the list.""" parsed_name = re.search( r'^(?:([^/]+)/)?(?:([^/]+)/)?([^@:/]+)(?:[@:](.+))?$', image_name) if parsed_name.group(4) is None: raise ZoeException( 'Image {} does not have a version tag'.format(image_name)) one_success = False for host_conf in self.docker_config: log.debug('Pre-loading image {} on host {}'.format( image_name, host_conf.name)) time_start = time.time() my_engine = DockerClient(host_conf) try: my_engine.pull_image(image_name) except ZoeException: log.error('Image {} pre-loading failed on host {}'.format( image_name, host_conf.name)) continue else: one_success = True log.debug('Image {} pre-loaded on host {} in {:.2f}s'.format( image_name, host_conf.name, time.time() - time_start)) if not one_success: raise ZoeException('Cannot pull image {}'.format(image_name))
def _update_node_state(self, host_conf: DockerHostConfig, node_stats: NodeStats, get_usage_stats: bool): node_stats.labels = host_conf.labels try: my_engine = DockerClient(host_conf) except ZoeException as e: log.error(str(e)) node_stats.status = 'offline' log.info('Node {} is offline'.format(host_conf.name)) return else: node_stats.status = 'online' try: container_list = my_engine.list(only_label={'zoe_deployment_name': get_conf().deployment_name}) info = my_engine.info() except ZoeException: return node_stats.container_count = len(container_list) node_stats.cores_total = info['NCPU'] node_stats.memory_total = info['MemTotal'] if info['Labels'] is not None: node_stats.labels += set(info['Labels']) node_stats.memory_reserved = sum([cont['memory_soft_limit'] for cont in container_list if cont['memory_soft_limit'] != node_stats.memory_total]) node_stats.cores_reserved = sum([cont['cpu_quota'] / cont['cpu_period'] for cont in container_list if cont['cpu_period'] != 0]) stats = {} for cont in container_list: stats[cont['id']] = {} stats[cont['id']]['core_limit'] = cont['cpu_quota'] / cont['cpu_period'] stats[cont['id']]['mem_limit'] = cont['memory_soft_limit'] node_stats.service_stats = stats if get_usage_stats: if get_conf().kairosdb_enable: kdb = KairosDBInMetrics() for cont in container_list: stats[cont['id']].update(kdb.get_service_usage(cont['name'])) node_stats.memory_in_use = sum([stat['mem_usage'] for stat in stats.values()]) node_stats.cores_in_use = sum([stat['cpu_usage'] for stat in stats.values()]) else: for cont in container_list: try: aux = my_engine.stats(cont['id'], stream=False) # this call is very slow (>~1sec) if 'usage' in aux['memory_stats']: stats[cont['id']]['mem_usage'] = aux['memory_stats']['usage'] else: stats[cont['id']]['mem_usage'] = 0 stats[cont['id']]['cpu_usage'] = self._get_core_usage(aux) except ZoeException: continue node_stats.memory_in_use = sum([stat['mem_usage'] for stat in stats.values()]) node_stats.cores_in_use = sum([stat['cpu_usage'] for stat in stats.values()]) else: node_stats.memory_in_use = 0 node_stats.cores_in_use = 0
def terminate_service(self, service: Service) -> None: """Terminate and delete a container.""" conf = self._get_config(service.backend_host) engine = DockerClient(conf) if service.backend_id is not None: engine.terminate_container(service.backend_id, delete=True) else: log.error('Cannot terminate service {}, since it has no backend ID'.format(service.name)) service.set_backend_status(service.BACKEND_DESTROY_STATUS)
def spawn_service(self, service_instance: ServiceInstance): """Spawn a service, translating a Zoe Service into a Docker container.""" parsed_name = re.search(r'^(?:([^/]+)/)?(?:([^/]+)/)?([^@:/]+)(?:[@:](.+))?$', service_instance.image_name) if parsed_name.group(4) is None: raise ZoeStartExecutionFatalException('Image {} does not have a version tag'.format(service_instance.image_name)) conf = self._get_config(service_instance.backend_host) try: engine = DockerClient(conf) cont_info = engine.spawn_container(service_instance) except ZoeNotEnoughResourcesException: raise ZoeStartExecutionRetryException('Not enough free resources to satisfy reservation request for service {}'.format(service_instance.name)) except ZoeException as e: raise ZoeStartExecutionFatalException(str(e)) return cont_info["id"], cont_info['external_address'], cont_info['ports']
def update_service(self, service, cores=None, memory=None): """Update a service reservation.""" conf = self._get_config(service.backend_host) try: engine = DockerClient(conf) except ZoeException as e: log.error(str(e)) return if service.backend_id is not None: info = engine.info() if cores is not None and cores > info['NCPU']: cores = info['NCPU'] if memory is not None and memory > info['MemTotal']: memory = info['MemTotal'] cpu_quota = int(cores * 100000) engine.update(service.backend_id, cpu_quota=cpu_quota, mem_reservation=memory) else: log.error('Cannot update reservations for service {} ({}), since it has no backend ID'.format(service.name, service.id))
def _host_subthread(self, host_config: DockerHostConfig): log.info("Synchro thread for host {} started".format(host_config.name)) self.host_stats[host_config.name] = NodeStats(host_config.name) while True: time_start = time.time() try: my_engine = DockerClient(host_config) container_list = my_engine.list( only_label={ 'zoe_deployment_name': get_conf().deployment_name }) info = my_engine.info() except ZoeException as e: self.host_stats[host_config.name].status = 'offline' log.error(str(e)) log.info('Node {} is offline'.format(host_config.name)) else: if self.host_stats[host_config.name].status == 'offline': log.info('Node {} is now online'.format(host_config.name)) self.host_stats[host_config.name].status = 'online' self.host_stats[ host_config.name].container_count = info['Containers'] self.host_stats[host_config.name].cores_total = info['NCPU'] self.host_stats[ host_config.name].memory_total = info['MemTotal'] self.host_stats[host_config.name].labels = host_config.labels if info['Labels'] is not None: self.host_stats[host_config.name].labels.union( set(info['Labels'])) self.host_stats[host_config.name].memory_allocated = sum([ cont['memory_soft_limit'] for cont in container_list if cont['memory_soft_limit'] != info['MemTotal'] ]) self.host_stats[host_config.name].cores_allocated = sum([ cont['cpu_quota'] / cont['cpu_period'] for cont in container_list if cont['cpu_period'] != 0 ]) stats = {} self.host_stats[host_config.name].memory_reserved = 0 self.host_stats[host_config.name].cores_reserved = 0 for cont in container_list: service = self.state.services.select( only_one=True, backend_host=host_config.name, backend_id=cont['id']) if service is None: log.warning( 'Container {} on host {} has no corresponding service' .format(cont['name'], host_config.name)) if cont['state'] == Service.BACKEND_DIE_STATUS: log.warning( 'Terminating dead and orphan container {}'. format(cont['name'])) my_engine.terminate_container(cont['id'], delete=True) continue self._update_service_status(service, cont) self.host_stats[ host_config. name].memory_reserved += service.resource_reservation.memory.min self.host_stats[ host_config. name].cores_reserved += service.resource_reservation.cores.min stats[service.id] = { 'core_limit': cont['cpu_quota'] / cont['cpu_period'], 'mem_limit': cont['memory_soft_limit'] } self.host_stats[host_config.name].service_stats = stats self.host_stats[host_config.name].images = [] for dk_image in my_engine.list_images(): image = { 'id': dk_image.attrs['Id'], 'size': dk_image.attrs['Size'], 'names': dk_image.tags # type: list } for name in image['names']: if name[-7:] == ':latest': # add an image with the name without 'latest' to fake Docker image lookup algorithm image['names'].append(name[:-7]) break self.host_stats[host_config.name].images.append(image) sleep_time = CHECK_INTERVAL - (time.time() - time_start) if sleep_time <= 0: log.warning( 'synchro thread for host {} is late by {:.2f} seconds'. format(host_config.name, sleep_time * -1)) sleep_time = 0 if self.stop.wait(timeout=sleep_time): break log.info("Synchro thread for host {} stopped".format(host_config.name))
def service_log(self, service: Service): """Get the log.""" conf = self._get_config(service.backend_host) engine = DockerClient(conf) return engine.logs(service.backend_id, True, False)