def _digest_application_description(state: SQLManager, execution: Execution): """Read an application description and expand it into services that can be deployed.""" nodes = node_list() images = [] for node in nodes: images += list_available_images(node) images = [name for image in images for name in image['names']] for service_descr in execution.description['services']: if service_descr['image'] not in images: execution.set_error() execution.set_error_message('image {} is not available'.format( service_descr['image'])) return False for service_descr in execution.description['services']: essential_count = service_descr['essential_count'] total_count = service_descr['total_count'] elastic_count = total_count - essential_count counter = 0 for service_n_ in range(essential_count): name = "{}{}".format(service_descr['name'], counter) sid = state.services.insert(execution.id, name, service_descr['name'], service_descr, True) # Ports for port_descr in service_descr['ports']: port_internal = str( port_descr['port_number']) + '/' + port_descr['protocol'] state.ports.insert(sid, port_internal, port_descr) counter += 1 for service_n_ in range(elastic_count): name = "{}{}".format(service_descr['name'], counter) sid = state.services.insert(execution.id, name, service_descr['name'], service_descr, False) # Ports for port_descr in service_descr['ports']: port_internal = str( port_descr['port_number']) + '/' + port_descr['protocol'] state.ports.insert(sid, port_internal, port_descr) counter += 1 assert counter == total_count return True
def execution_terminate(scheduler: ZoeBaseScheduler, execution: Execution, reason: str): """Remove an execution from the scheduler.""" if execution.is_running or execution.status == execution.QUEUED_STATUS: execution.set_cleaning_up() execution.set_error_message(reason) scheduler.terminate(execution) elif execution.status == execution.CLEANING_UP_STATUS: scheduler.terminate(execution) elif execution.status == execution.SUBMIT_STATUS: execution.set_terminated(reason) elif execution.status == execution.STARTING_STATUS: return # It is unsafe to terminate executions in these statuses elif execution.status == execution.ERROR_STATUS: terminate_execution(execution, reason) elif execution.status == execution.TERMINATED_STATUS: return
def service_list_to_containers(execution: Execution, service_list: List[Service], placement=None) -> str: """Given a subset of services from an execution, tries to start them, return one of 'ok', 'requeue' for temporary failures and 'fatal' for fatal failures.""" backend = _get_backend() ordered_service_list = sorted(service_list, key=lambda x: x.startup_order) env_subst_dict = { 'execution_id': execution.id, "execution_name": execution.name, 'user_name': execution.user_id, 'deployment_name': get_conf().deployment_name, } for service in execution.services: env_subst_dict['dns_name#' + service.name] = service.dns_name for service in ordered_service_list: env_subst_dict['dns_name#self'] = service.dns_name if placement is not None: service.assign_backend_host(placement[service.id]) service.set_starting() instance = ServiceInstance(execution, service, env_subst_dict) try: backend_id, ip_address, ports = backend.spawn_service(instance) except ZoeStartExecutionRetryException as ex: log.warning( 'Temporary failure starting service {} of execution {}: {}'. format(service.id, execution.id, ex.message)) service.set_error(ex.message) execution.set_error_message(ex.message) terminate_execution(execution) execution.set_scheduled() return "requeue" except ZoeStartExecutionFatalException as ex: log.error( 'Fatal error trying to start service {} of execution {}: {}'. format(service.id, execution.id, ex.message)) service.set_error(ex.message) execution.set_error_message(ex.message) terminate_execution(execution) execution.set_error() return "fatal" except Exception as ex: log.error('Fatal error trying to start service {} of execution {}'. format(service.id, execution.id)) log.exception('BUG, this error should have been caught earlier') execution.set_error_message(str(ex)) terminate_execution(execution) execution.set_error() return "fatal" else: log.debug('Service {} started'.format(instance.name)) service.set_active(backend_id, ip_address, ports) return "ok"
def _digest_application_description(state: SQLManager, execution: Execution): """Read an application description and expand it into services that can be deployed.""" if get_conf().backend == 'DockerEngine': nodes = node_list() images = [] for node in nodes: images += list_available_images(node) images = [name for image in images for name in image['names']] if len(images) == 0: log.warning('The image list reported by the back-end is empty') for service_descr in execution.description['services']: if service_descr['image'] not in images: execution.set_error() execution.set_error_message('image {} is not available'.format( service_descr['image'])) return False for service_descr in execution.description['services']: essential_count = service_descr['essential_count'] total_count = service_descr['total_count'] if essential_count > total_count: execution.set_error() execution.set_error_message( 'total_count is less than essential_count for service {}'. format(service_descr['name'])) return False elastic_count = total_count - essential_count counter = 0 for service_n_ in range(essential_count): name = "{}{}".format(service_descr['name'], counter) sid = state.services.insert(execution.id, name, service_descr['name'], service_descr, True) # Ports for port_descr in service_descr['ports']: port_internal = str( port_descr['port_number']) + '/' + port_descr['protocol'] state.ports.insert(sid, port_internal, port_descr) counter += 1 for service_n_ in range(elastic_count): name = "{}{}".format(service_descr['name'], counter) sid = state.services.insert(execution.id, name, service_descr['name'], service_descr, False) # Ports for port_descr in service_descr['ports']: port_internal = str( port_descr['port_number']) + '/' + port_descr['protocol'] state.ports.insert(sid, port_internal, port_descr) counter += 1 assert counter == total_count if get_conf().scheduler_policy == 'DYNSIZE': execution.set_size(execution.total_reservations.cores.min * execution.total_reservations.memory.min) return True