def start_essential(execution: Execution, placement) -> str: """Start the essential services for this execution""" log.debug('starting essential services for execution {}'.format( execution.id)) execution.set_starting() return service_list_to_containers(execution, execution.essential_services, placement)
def terminate(self, execution: Execution) -> None: """ Inform the master that an execution has been terminated. This can be done asynchronously. :param execution: the terminated execution :return: None """ execution.set_cleaning_up() self.queue_termination.append(execution)
def start_all(execution: Execution) -> str: """Translate an execution object into containers. If an error occurs some containers may have been created and needs to be cleaned-up. """ log.debug('starting all services for execution {}'.format(execution.id)) execution.set_starting() return service_list_to_containers(execution, execution.services)
def execution_submit(state: SQLManager, scheduler: ZoeBaseScheduler, execution: Execution): """Submit a new execution to the scheduler.""" if execution.status != execution.SUBMIT_STATUS: log.warning('Trying to start an execution in state {}'.format( execution.status)) return if _digest_application_description(state, execution): execution.set_queued() scheduler.incoming(execution)
def execution_terminate(scheduler: ZoeBaseScheduler, execution: Execution): """Remove an execution form the scheduler.""" if execution.is_running or execution.status == execution.SCHEDULED_STATUS: execution.set_cleaning_up() scheduler.terminate(execution) elif execution.status == execution.SUBMIT_STATUS or execution.status == execution.STARTING_STATUS: return # It is unsafe to terminate executions in these statuses elif execution.status == execution.ERROR_STATUS or execution.status == execution.CLEANING_UP_STATUS: terminate_execution(execution) elif execution.status == execution.TERMINATED_STATUS: return
def _digest_application_description(state: SQLManager, execution: Execution): """Read an application description and expand it into services that can be deployed.""" nodes = node_list() images = [] for node in nodes: images += list_available_images(node) images = [name for image in images for name in image['names']] for service_descr in execution.description['services']: if service_descr['image'] not in images: execution.set_error() execution.set_error_message('image {} is not available'.format( service_descr['image'])) return False for service_descr in execution.description['services']: essential_count = service_descr['essential_count'] total_count = service_descr['total_count'] elastic_count = total_count - essential_count counter = 0 for service_n_ in range(essential_count): name = "{}{}".format(service_descr['name'], counter) sid = state.services.insert(execution.id, name, service_descr['name'], service_descr, True) # Ports for port_descr in service_descr['ports']: port_internal = str( port_descr['port_number']) + '/' + port_descr['protocol'] state.ports.insert(sid, port_internal, port_descr) counter += 1 for service_n_ in range(elastic_count): name = "{}{}".format(service_descr['name'], counter) sid = state.services.insert(execution.id, name, service_descr['name'], service_descr, False) # Ports for port_descr in service_descr['ports']: port_internal = str( port_descr['port_number']) + '/' + port_descr['protocol'] state.ports.insert(sid, port_internal, port_descr) counter += 1 assert counter == total_count return True
def service_list_to_containers(execution: Execution, service_list: List[Service], placement=None) -> str: """Given a subset of services from an execution, tries to start them, return one of 'ok', 'requeue' for temporary failures and 'fatal' for fatal failures.""" backend = _get_backend() ordered_service_list = sorted(service_list, key=lambda x: x.startup_order) env_subst_dict = { 'execution_id': execution.id, 'execution_name': execution.name, 'user_name': execution.owner.username, 'deployment_name': get_conf().deployment_name, } for service in execution.services: env_subst_dict['dns_name#' + service.name] = service.dns_name for service in ordered_service_list: env_subst_dict['dns_name#self'] = service.dns_name if placement is not None: service.assign_backend_host(placement[service.id]) service.set_starting() instance = ServiceInstance(execution, service, env_subst_dict) try: backend_id, ip_address, ports = backend.spawn_service(instance) except ZoeStartExecutionRetryException as ex: log.warning( 'Temporary failure starting service {} of execution {}: {}'. format(service.id, execution.id, ex.message)) service.set_error(ex.message) terminate_execution(execution, reason=ex.message) execution.set_queued() return "requeue" except ZoeStartExecutionFatalException as ex: log.error( 'Fatal error trying to start service {} of execution {}: {}'. format(service.id, execution.id, ex.message)) service.set_error(ex.message) terminate_execution(execution, reason=ex.message) execution.set_error() return "fatal" except Exception as ex: log.error('Fatal error trying to start service {} of execution {}'. format(service.id, execution.id)) log.exception('BUG, this error should have been caught earlier') terminate_execution(execution, reason=str(ex)) execution.set_error() return "fatal" else: log.debug('Service {} started'.format(instance.name)) service.set_active(backend_id, ip_address, ports) return "ok"
def execution_terminate(scheduler: ZoeBaseScheduler, execution: Execution, reason: str): """Remove an execution from the scheduler.""" if execution.is_running or execution.status == execution.QUEUED_STATUS: execution.set_cleaning_up() execution.set_error_message(reason) scheduler.terminate(execution) elif execution.status == execution.CLEANING_UP_STATUS: scheduler.terminate(execution) elif execution.status == execution.SUBMIT_STATUS: execution.set_terminated(reason) elif execution.status == execution.STARTING_STATUS: return # It is unsafe to terminate executions in these statuses elif execution.status == execution.ERROR_STATUS: terminate_execution(execution, reason) elif execution.status == execution.TERMINATED_STATUS: return
def execution_submit(state: SQLManager, scheduler: ZoeBaseScheduler, execution: Execution): """Submit a new execution to the scheduler.""" if _digest_application_description(state, execution): execution.set_scheduled() scheduler.incoming(execution)
def terminate_execution(execution: Execution) -> None: """Terminate an execution.""" for service in execution.services: # type: Service terminate_service(service) execution.set_terminated()
def terminate_execution(execution: Execution, reason: Union[None, str] = None) -> None: """Terminate an execution.""" for service in execution.services: # type: Service terminate_service(service) execution.set_terminated(reason)
def _digest_application_description(state: SQLManager, execution: Execution): """Read an application description and expand it into services that can be deployed.""" if get_conf().backend == 'DockerEngine': nodes = node_list() images = [] for node in nodes: images += list_available_images(node) images = [name for image in images for name in image['names']] if len(images) == 0: log.warning('The image list reported by the back-end is empty') for service_descr in execution.description['services']: if service_descr['image'] not in images: execution.set_error() execution.set_error_message('image {} is not available'.format( service_descr['image'])) return False for service_descr in execution.description['services']: essential_count = service_descr['essential_count'] total_count = service_descr['total_count'] if essential_count > total_count: execution.set_error() execution.set_error_message( 'total_count is less than essential_count for service {}'. format(service_descr['name'])) return False elastic_count = total_count - essential_count counter = 0 for service_n_ in range(essential_count): name = "{}{}".format(service_descr['name'], counter) sid = state.services.insert(execution.id, name, service_descr['name'], service_descr, True) # Ports for port_descr in service_descr['ports']: port_internal = str( port_descr['port_number']) + '/' + port_descr['protocol'] state.ports.insert(sid, port_internal, port_descr) counter += 1 for service_n_ in range(elastic_count): name = "{}{}".format(service_descr['name'], counter) sid = state.services.insert(execution.id, name, service_descr['name'], service_descr, False) # Ports for port_descr in service_descr['ports']: port_internal = str( port_descr['port_number']) + '/' + port_descr['protocol'] state.ports.insert(sid, port_internal, port_descr) counter += 1 assert counter == total_count if get_conf().scheduler_policy == 'DYNSIZE': execution.set_size(execution.total_reservations.cores.min * execution.total_reservations.memory.min) return True