Beispiel #1
0
def start_essential(execution: Execution, placement) -> str:
    """Start the essential services for this execution"""
    log.debug('starting essential services for execution {}'.format(
        execution.id))
    execution.set_starting()

    return service_list_to_containers(execution, execution.essential_services,
                                      placement)
 def terminate(self, execution: Execution) -> None:
     """
     Inform the master that an execution has been terminated. This can be done asynchronously.
     :param execution: the terminated execution
     :return: None
     """
     execution.set_cleaning_up()
     self.queue_termination.append(execution)
Beispiel #3
0
def start_all(execution: Execution) -> str:
    """Translate an execution object into containers.

    If an error occurs some containers may have been created and needs to be cleaned-up.
    """
    log.debug('starting all services for execution {}'.format(execution.id))
    execution.set_starting()
    return service_list_to_containers(execution, execution.services)
def execution_submit(state: SQLManager, scheduler: ZoeBaseScheduler,
                     execution: Execution):
    """Submit a new execution to the scheduler."""
    if execution.status != execution.SUBMIT_STATUS:
        log.warning('Trying to start an execution in state {}'.format(
            execution.status))
        return
    if _digest_application_description(state, execution):
        execution.set_queued()
        scheduler.incoming(execution)
Beispiel #5
0
def execution_terminate(scheduler: ZoeBaseScheduler, execution: Execution):
    """Remove an execution form the scheduler."""
    if execution.is_running or execution.status == execution.SCHEDULED_STATUS:
        execution.set_cleaning_up()
        scheduler.terminate(execution)
    elif execution.status == execution.SUBMIT_STATUS or execution.status == execution.STARTING_STATUS:
        return  # It is unsafe to terminate executions in these statuses
    elif execution.status == execution.ERROR_STATUS or execution.status == execution.CLEANING_UP_STATUS:
        terminate_execution(execution)
    elif execution.status == execution.TERMINATED_STATUS:
        return
Beispiel #6
0
def _digest_application_description(state: SQLManager, execution: Execution):
    """Read an application description and expand it into services that can be deployed."""
    nodes = node_list()
    images = []
    for node in nodes:
        images += list_available_images(node)

    images = [name for image in images for name in image['names']]
    for service_descr in execution.description['services']:
        if service_descr['image'] not in images:
            execution.set_error()
            execution.set_error_message('image {} is not available'.format(
                service_descr['image']))
            return False

    for service_descr in execution.description['services']:
        essential_count = service_descr['essential_count']
        total_count = service_descr['total_count']
        elastic_count = total_count - essential_count
        counter = 0
        for service_n_ in range(essential_count):
            name = "{}{}".format(service_descr['name'], counter)
            sid = state.services.insert(execution.id, name,
                                        service_descr['name'], service_descr,
                                        True)

            # Ports
            for port_descr in service_descr['ports']:
                port_internal = str(
                    port_descr['port_number']) + '/' + port_descr['protocol']
                state.ports.insert(sid, port_internal, port_descr)

            counter += 1

        for service_n_ in range(elastic_count):
            name = "{}{}".format(service_descr['name'], counter)
            sid = state.services.insert(execution.id, name,
                                        service_descr['name'], service_descr,
                                        False)

            # Ports
            for port_descr in service_descr['ports']:
                port_internal = str(
                    port_descr['port_number']) + '/' + port_descr['protocol']
                state.ports.insert(sid, port_internal, port_descr)

            counter += 1
        assert counter == total_count

    return True
Beispiel #7
0
def service_list_to_containers(execution: Execution,
                               service_list: List[Service],
                               placement=None) -> str:
    """Given a subset of services from an execution, tries to start them, return one of 'ok', 'requeue' for temporary failures and 'fatal' for fatal failures."""
    backend = _get_backend()

    ordered_service_list = sorted(service_list, key=lambda x: x.startup_order)

    env_subst_dict = {
        'execution_id': execution.id,
        'execution_name': execution.name,
        'user_name': execution.owner.username,
        'deployment_name': get_conf().deployment_name,
    }

    for service in execution.services:
        env_subst_dict['dns_name#' + service.name] = service.dns_name

    for service in ordered_service_list:
        env_subst_dict['dns_name#self'] = service.dns_name
        if placement is not None:
            service.assign_backend_host(placement[service.id])
        service.set_starting()
        instance = ServiceInstance(execution, service, env_subst_dict)
        try:
            backend_id, ip_address, ports = backend.spawn_service(instance)
        except ZoeStartExecutionRetryException as ex:
            log.warning(
                'Temporary failure starting service {} of execution {}: {}'.
                format(service.id, execution.id, ex.message))
            service.set_error(ex.message)
            terminate_execution(execution, reason=ex.message)
            execution.set_queued()
            return "requeue"
        except ZoeStartExecutionFatalException as ex:
            log.error(
                'Fatal error trying to start service {} of execution {}: {}'.
                format(service.id, execution.id, ex.message))
            service.set_error(ex.message)
            terminate_execution(execution, reason=ex.message)
            execution.set_error()
            return "fatal"
        except Exception as ex:
            log.error('Fatal error trying to start service {} of execution {}'.
                      format(service.id, execution.id))
            log.exception('BUG, this error should have been caught earlier')
            terminate_execution(execution, reason=str(ex))
            execution.set_error()
            return "fatal"
        else:
            log.debug('Service {} started'.format(instance.name))
            service.set_active(backend_id, ip_address, ports)

    return "ok"
def execution_terminate(scheduler: ZoeBaseScheduler, execution: Execution,
                        reason: str):
    """Remove an execution from the scheduler."""
    if execution.is_running or execution.status == execution.QUEUED_STATUS:
        execution.set_cleaning_up()
        execution.set_error_message(reason)
        scheduler.terminate(execution)
    elif execution.status == execution.CLEANING_UP_STATUS:
        scheduler.terminate(execution)
    elif execution.status == execution.SUBMIT_STATUS:
        execution.set_terminated(reason)
    elif execution.status == execution.STARTING_STATUS:
        return  # It is unsafe to terminate executions in these statuses
    elif execution.status == execution.ERROR_STATUS:
        terminate_execution(execution, reason)
    elif execution.status == execution.TERMINATED_STATUS:
        return
Beispiel #9
0
def execution_submit(state: SQLManager, scheduler: ZoeBaseScheduler,
                     execution: Execution):
    """Submit a new execution to the scheduler."""
    if _digest_application_description(state, execution):
        execution.set_scheduled()
        scheduler.incoming(execution)
Beispiel #10
0
def terminate_execution(execution: Execution) -> None:
    """Terminate an execution."""
    for service in execution.services:  # type: Service
        terminate_service(service)
    execution.set_terminated()
Beispiel #11
0
def terminate_execution(execution: Execution,
                        reason: Union[None, str] = None) -> None:
    """Terminate an execution."""
    for service in execution.services:  # type: Service
        terminate_service(service)
    execution.set_terminated(reason)
def _digest_application_description(state: SQLManager, execution: Execution):
    """Read an application description and expand it into services that can be deployed."""
    if get_conf().backend == 'DockerEngine':
        nodes = node_list()
        images = []
        for node in nodes:
            images += list_available_images(node)

        images = [name for image in images for name in image['names']]
        if len(images) == 0:
            log.warning('The image list reported by the back-end is empty')
        for service_descr in execution.description['services']:
            if service_descr['image'] not in images:
                execution.set_error()
                execution.set_error_message('image {} is not available'.format(
                    service_descr['image']))
                return False

    for service_descr in execution.description['services']:
        essential_count = service_descr['essential_count']
        total_count = service_descr['total_count']
        if essential_count > total_count:
            execution.set_error()
            execution.set_error_message(
                'total_count is less than essential_count for service {}'.
                format(service_descr['name']))
            return False
        elastic_count = total_count - essential_count
        counter = 0
        for service_n_ in range(essential_count):
            name = "{}{}".format(service_descr['name'], counter)
            sid = state.services.insert(execution.id, name,
                                        service_descr['name'], service_descr,
                                        True)

            # Ports
            for port_descr in service_descr['ports']:
                port_internal = str(
                    port_descr['port_number']) + '/' + port_descr['protocol']
                state.ports.insert(sid, port_internal, port_descr)

            counter += 1

        for service_n_ in range(elastic_count):
            name = "{}{}".format(service_descr['name'], counter)
            sid = state.services.insert(execution.id, name,
                                        service_descr['name'], service_descr,
                                        False)

            # Ports
            for port_descr in service_descr['ports']:
                port_internal = str(
                    port_descr['port_number']) + '/' + port_descr['protocol']
                state.ports.insert(sid, port_internal, port_descr)

            counter += 1
        assert counter == total_count

    if get_conf().scheduler_policy == 'DYNSIZE':
        execution.set_size(execution.total_reservations.cores.min *
                           execution.total_reservations.memory.min)

    return True