Beispiel #1
0
def swarm_events_thread(args):
    swarm = SwarmClient(args)
    while True:
        try:
            swarm.event_listener(main_callback)
        except Exception:
            log.exception('Something bad happened')
Beispiel #2
0
 def ip_address(self):
     """Getter for the service IP address, queries Swarm as the IP address changes outside our control."""
     if self.docker_status != self.DOCKER_START_STATUS:
         return {}
     swarm = SwarmClient(get_conf())
     s_info = swarm.inspect_container(self.docker_id)
     return s_info['ip_address'][get_conf().overlay_network_name]
Beispiel #3
0
def guest_check_thread(args):
    swarm = SwarmClient(args)

    while True:
        try:
            zoe_containers = swarm.list(
                {'zoe.deployment_name': get_conf().deployment_name})
            for c in zoe_containers:
                if 'Exited' in c['status']:
                    zoe_id = c['labels']['zoe.service.id']
                    try:
                        container_died(zoe_id)
                    except ZoeAPIException:
                        log.warning(
                            'Container ' + c['name'] +
                            ' has died, but Zoe does not know anything about it, deleting'
                        )
                        swarm.terminate_container(c['id'], delete=True)

            check_guests(swarm)

            time.sleep(get_conf().loop_time)

        except Exception:
            log.exception('Something bad happened')
Beispiel #4
0
    def loop(self):
        assert isinstance(config.singletons['sql_manager'],
                          zoe_lib.sql_manager.SQLManager)
        while True:
            message = self.zmq_s.recv_json()
            self.debug_has_replied = False
            start_time = time.time()
            if message['command'] == 'execution_start':
                exec_id = message['exec_id']
                execution = config.singletons['sql_manager'].execution_list(
                    id=exec_id, only_one=True)
                if execution is None:
                    self._reply_error('Execution ID {} not found'.format(
                        message['exec_id']))
                else:
                    execution.set_scheduled()
                    self._reply_ok()
                    zoe_master.execution_manager.execution_submit(execution)
            elif message['command'] == 'execution_terminate':
                exec_id = message['exec_id']
                execution = config.singletons['sql_manager'].execution_list(
                    id=exec_id, only_one=True)
                if execution is None:
                    self._reply_error('Execution ID {} not found'.format(
                        message['exec_id']))
                else:
                    execution.set_cleaning_up()
                    self._reply_ok()
                    zoe_master.execution_manager.execution_terminate(execution)
            elif message['command'] == 'execution_delete':
                exec_id = message['exec_id']
                execution = config.singletons['sql_manager'].execution_list(
                    id=exec_id, only_one=True)
                if execution is not None:
                    zoe_master.execution_manager.execution_delete(execution)
                self._reply_ok()
            elif message['command'] == 'service_inspect':
                service_id = message['service_id']
                service = config.singletons['sql_manager'].service_list(
                    id=service_id, only_one=True)
                if service is None:
                    self._reply_error('no such service')
                else:
                    swarm = SwarmClient(config.get_conf())
                    info = swarm.inspect_container(service.docker_id)
                    self._reply_ok(info)
            else:
                log.error('Unknown command: {}'.format(message['command']))
                self._reply_error('unknown command')

            if not self.debug_has_replied:
                self._reply_error('bug')
                raise ZoeException('BUG: command {} does not fill a reply')

            config.singletons['metric'].metric_api_call(
                start_time, message['command'])
Beispiel #5
0
 def service_logs(self, uid, role, service_id, stream=True):
     """Retrieve the logs for the given service."""
     service = self.sql.service_list(id=service_id, only_one=True)
     if service is None:
         raise zoe_api.exceptions.ZoeNotFoundException('No such service')
     if service.user_id != uid and role != 'admin':
         raise zoe_api.exceptions.ZoeAuthException()
     if service.docker_id is None:
         raise zoe_api.exceptions.ZoeNotFoundException('Container is not running')
     swarm = SwarmClient(get_conf())
     return swarm.logs(service.docker_id, stream)
Beispiel #6
0
def terminate_execution(execution: Execution) -> None:
    execution.set_cleaning_up()
    swarm = SwarmClient(get_conf())
    for s in execution.services:
        assert isinstance(s, Service)
        if s.docker_id is not None:
            s.set_terminating()
            swarm.terminate_container(s.docker_id, delete=True)
            s.set_inactive()
            log.debug('Service {} terminated'.format(s.name))
    execution.set_terminated()
Beispiel #7
0
 def run(self):
     """The thread loop."""
     log.info("Monitor thread started")
     swarm = SwarmClient(get_conf())
     while True:
         try:
             swarm.event_listener(lambda x: self._event_cb(x))
         except:
             log.exception('Exception in monitor thread')
         time.sleep(
             1
         )  # Usually we got disconnected, so wait a bit before retrying
Beispiel #8
0
def terminate_execution(execution: Execution) -> None:
    """Terminate an execution, making sure no containers are left in Swarm."""
    execution.set_cleaning_up()
    swarm = SwarmClient(get_conf())
    for service in execution.services:
        assert isinstance(service, Service)
        if service.docker_id is not None:
            service.set_terminating()
            swarm.terminate_container(service.docker_id, delete=True)
            service.set_inactive()
            log.debug('Service {} terminated'.format(service.name))
    execution.set_terminated()
Beispiel #9
0
def main():
    """The main entrypoint function."""
    conf = load_configuration()
    config.load_configuration(conf)
    args = config.get_conf()
    if args.debug:
        logging.basicConfig(level=logging.DEBUG, format=LOG_FORMAT)
    else:
        logging.basicConfig(level=logging.INFO, format=LOG_FORMAT)

    logging.getLogger('kazoo').setLevel(logging.WARNING)
    logging.getLogger('requests').setLevel(logging.WARNING)
    logging.getLogger('urllib3').setLevel(logging.WARNING)
    logging.getLogger('docker').setLevel(logging.INFO)
    logging.getLogger("tornado").setLevel(logging.DEBUG)

    state = FakeSQLManager()

    zapp_description = json.load(args.jsonfile)

    print('Validating zapp description...')
    zoe_lib.applications.app_validate(zapp_description)

    exec_id = state.execution_new('test', 'fake_user', zapp_description)
    e = state.execution_list(only_one=True, id=exec_id)
    _digest_application_description(state, e)

    print('Zapp digested, starting containers...')
    execution_to_containers(e)

    print('Giving the containers a few seconds to start...')
    time.sleep(5)

    swarm = SwarmClient(args)
    for service in e.services:
        print("Service {}, docker ID: {}".format(service.name, service.docker_id))
        logs = swarm.logs(service.docker_id, False)
        logs = logs.decode('utf-8').split('\n')
        for log_line in logs[-10:]:
            print(log_line)

    print("Execution as been started, press CTRL-C to terminate it")
    try:
        while True:
            time.sleep(1)
    except KeyboardInterrupt:
        pass

    print('Terminating...')
    terminate_execution(e)
Beispiel #10
0
def _spawn_service(execution: Execution, service: Service, env_subst_dict: dict):
    copts = DockerContainerOptions()
    copts.gelf_log_address = get_conf().gelf_address
    copts.name = service.dns_name
    copts.set_memory_limit(service.description['required_resources']['memory'])
    copts.network_name = get_conf().overlay_network_name
    copts.labels = {
        'zoe.execution.name': execution.name,
        'zoe.execution.id': str(execution.id),
        'zoe.service.name': service.name,
        'zoe.service.id': str(service.id),
        'zoe.owner': execution.user_id,
        'zoe.deployment_name': get_conf().deployment_name,
        'zoe.type': 'app_service'
    }
    if service.description['monitor']:
        copts.labels['zoe.monitor'] = 'true'
    else:
        copts.labels['zoe.monitor'] = 'false'
    copts.restart = not service.description['monitor']  # Monitor containers should not restart

    # Generate a dictionary containing the current cluster status (before the new container is spawned)
    # This information is used to substitute template strings in the environment variables
    for env_name, env_value in service.description['environment']:
        try:
            env_value = env_value.format(**env_subst_dict)
        except KeyError:
            raise ZoeStartExecutionFatalException("unknown variable in expression {}".format(env_value))
        copts.add_env_variable(env_name, env_value)

    for p in service.description['ports']:
        if p['expose']:
            copts.ports.append(p['port_number'])  # FIXME UDP ports?

    if 'volumes' in service.description:
        for path, mount_point, readonly in service.description['volumes']:
            copts.add_volume_bind(path, mount_point, readonly)

    for wks in singletons['workspace_managers']:
        assert isinstance(wks, zoe_master.workspace.base.ZoeWorkspaceBase)
        if wks.can_be_attached():
            copts.add_volume_bind(wks.get_path(execution.user_id), wks.get_mountpoint(), False)

    # The same dictionary is used for templates in the command
    if 'command' in service.description:
        copts.set_command(service.description['command'].format(**env_subst_dict))

    try:
        swarm = SwarmClient(get_conf())
    except Exception as e:
        raise ZoeStartExecutionFatalException(str(e))

    try:
        cont_info = swarm.spawn_container(service.description['docker_image'], copts)
    except ZoeException as e:
        raise ZoeStartExecutionRetryException(str(e))

    service.set_active(cont_info["docker_id"])

    if 'networks' in service.description:
        for net in service.description['networks']:
            try:
                swarm.connect_to_network(service.docker_id, net)
            except ZoeException as e:
                raise ZoeStartExecutionFatalException(str(e))

    return
Beispiel #11
0
    def __init__(self):
        super().__init__(name='stats', daemon=True)
        self.swarm = SwarmClient(get_conf())

        self._swarm_stats = None
Beispiel #12
0
def _spawn_service(execution: Execution, service: Service,
                   env_subst_dict: dict):
    copts = DockerContainerOptions()
    copts.gelf_log_address = get_conf().gelf_address
    copts.name = service.dns_name
    copts.set_memory_limit(service.description['required_resources']['memory'])
    copts.network_name = get_conf().overlay_network_name
    copts.labels = {
        'zoe.execution.name': execution.name,
        'zoe.execution.id': str(execution.id),
        'zoe.service.name': service.name,
        'zoe.service.id': str(service.id),
        'zoe.owner': execution.user_id,
        'zoe.deployment_name': get_conf().deployment_name,
        'zoe.type': 'app_service'
    }
    if service.description['monitor']:
        copts.labels['zoe.monitor'] = 'true'
    else:
        copts.labels['zoe.monitor'] = 'false'
    copts.restart = not service.description[
        'monitor']  # Monitor containers should not restart

    _gen_environment(service, env_subst_dict, copts)

    for p in service.description['ports']:
        if p['expose']:
            copts.ports.append(p['port_number'])  # FIXME UDP ports?

    if 'volumes' in service.description:
        for path, mount_point, readonly in service.description['volumes']:
            copts.add_volume_bind(path, mount_point, readonly)

    if 'constraints' in service.description:
        for constraint in service.description['constraints']:
            copts.add_constraint(constraint)

    fswk = ZoeFSWorkspace()
    if fswk.can_be_attached():
        copts.add_volume_bind(fswk.get_path(execution.user_id),
                              fswk.get_mountpoint(), False)

    # The same dictionary is used for templates in the command
    if 'command' in service.description:
        copts.set_command(
            service.description['command'].format(**env_subst_dict))

    try:
        swarm = SwarmClient(get_conf())
    except Exception as e:
        raise ZoeStartExecutionFatalException(str(e))

    try:
        cont_info = swarm.spawn_container(service.description['docker_image'],
                                          copts)
    except ZoeException as e:
        raise ZoeStartExecutionRetryException(str(e))
    except ZoeLibException as e:
        raise ZoeStartExecutionRetryException(str(e))

    service.set_active(cont_info["docker_id"])

    if 'networks' in service.description:
        for net in service.description['networks']:
            try:
                swarm.connect_to_network(service.docker_id, net)
            except ZoeException as e:
                raise ZoeStartExecutionFatalException(str(e))

    return