Example #1
0
def run_service(service_name, image_name, command, restart_condition,
                networks, s_and_t, ports, mode='global', replicas=1):
    docker = MANAGER_CLIENR
    from dsp.image.valify import valify
    #if not valify(image_name):
     #   return "找不到应用指定的镜像!"
    # Mount对象的集合
    mounts = []
    image_name = '192.168.123.251/'+image_name
    for s_t in s_and_t:
        type = False
        if s_t.get('read_only', '') == 'yes':
            type = True
        m = Mount(source=s_t.get('source', ''), target=s_t.get('target', ''), read_only=type)
        mounts.append(m)
        print s_t.get('source', '')
    #containSpec = ContainerSpec( image=image_name, mounts=mounts, command=command, tty=True, open_stdin=True)
    containSpec = ContainerSpec( image=image_name, command=command, tty=True, open_stdin=True)

    # 声明TaskTemplate--->task
    # restart_condition --->none, on - failure, or any
    restart_policy = RestartPolicy(condition=restart_condition)
    task = TaskTemplate(container_spec=containSpec, restart_policy=restart_policy)
    # 应用启动模式
    if mode == "global":
        replicas = None
    service_mode = ServiceMode(mode=mode, replicas=replicas)
    # 接入点,包括负载均衡模式设置等
    end_point = EndpointSpec(ports=ports)

    service_id = docker.create_service(task_template=task, name=service_name,
                                       mode=service_mode, networks=networks, endpoint_spec=end_point)
    return service_id
Example #2
0
 def createService(self, service_name, image, ports, restart, cpu_limit,
                   cpu_reservation, mem_reservation, mem_limit, volumes,
                   environment, labels, command, networks):
     try:
         self.is_deploying = True
         resources = DockerResources(mem_limit=mem_limit,
                                     mem_reservation=mem_reservation,
                                     cpu_limit=cpu_limit,
                                     cpu_reservation=cpu_reservation)
         restart_policy = RestartPolicy(
             condition=restart['name'],
             delay=10,
             max_attempts=restart['MaximumRetryCount'])
         endpoints = EndpointSpec(mode='vip', ports=ports)
         self.docker_client.services.create(image=image,
                                            name=service_name,
                                            hostname=service_name,
                                            restart_policy=restart_policy,
                                            endpoint_spec=endpoints,
                                            resources=resources,
                                            mounts=volumes,
                                            env=environment,
                                            labels=labels,
                                            command=command,
                                            networks=networks)
         logging.info("{0} component deployed".format(service_name))
         self.is_deploying = False
         return resources
     except Exception as e:
         print(e)
         self.is_deploying = False
         return None
def launch_docker_kernel(kernel_id, response_addr, spark_context_init_mode):
    # Launches a containerized kernel.

    # Can't proceed if no image was specified.
    image_name = os.environ.get('KERNEL_IMAGE', None)
    if image_name is None:
        sys.exit("ERROR - KERNEL_IMAGE not found in environment - kernel launch terminating!")

    # Container name is composed of KERNEL_USERNAME and KERNEL_ID
    container_name = os.environ.get('KERNEL_USERNAME', '') + '-' + kernel_id

    # Determine network. If EG_DOCKER_NETWORK has not been propagated, fall back to 'bridge'...
    docker_network = os.environ.get('EG_DOCKER_NETWORK', 'bridge')

    # Build labels - these will be modelled similar to kubernetes: kernel_id, component, app, ...
    labels = dict()
    labels['kernel_id'] = kernel_id
    labels['component'] = 'kernel'
    labels['app'] = 'enterprise-gateway'

    # Capture env parameters...
    param_env = dict()
    param_env['EG_RESPONSE_ADDRESS'] = response_addr
    param_env['KERNEL_SPARK_CONTEXT_INIT_MODE'] = spark_context_init_mode

    # Since the environment is specific to the kernel (per env stanza of kernelspec, KERNEL_ and ENV_WHITELIST)
    # just add the env here.
    param_env.update(os.environ)
    param_env.pop('PATH')  # Let the image PATH be used.  Since this is relative to images, we're probably safe.

    client = DockerClient.from_env()
    if swarm_mode:
        networks = list()
        networks.append(docker_network)
        mounts = list()
        mounts.append("/usr/local/share/jupyter/kernels:/usr/local/share/jupyter/kernels:ro")
        endpoint_spec = EndpointSpec(mode='dnsrr')
        restart_policy = RestartPolicy(condition='none')
        kernel_service = client.services.create(image_name,
                                               name=container_name,
                                               endpoint_spec=endpoint_spec,
                                               restart_policy=restart_policy,
                                               env=param_env,
                                               container_labels=labels,
                                               labels=labels,
                                               #mounts=mounts,   # Enable if necessary
                                               networks=networks)
    else:
        volumes = {'/usr/local/share/jupyter/kernels': {'bind': '/usr/local/share/jupyter/kernels', 'mode': 'ro'}}
        kernel_container = client.containers.run(image_name,
                                                 name=container_name,
                                                 hostname=container_name,
                                                 environment=param_env,
                                                 labels=labels,
                                                 remove=remove_container,
                                                 network=docker_network,
                                                 #volumes=volumes,  # Enable if necessary
                                                 detach=True)
Example #4
0
def createService(image, command, cpuRequirments, name, labels, selectedNodeId):
    client = docker.from_env()
    cpuRequirmentsInNanoSeconds = cpuRequirments * 1000000000

    client.services.create(
        image,
        command,
        constraints=["node.role == worker", "node.id == " + selectedNodeId],
        mode=ServiceMode("replicated", 1),
        restart_policy=RestartPolicy(condition='none'),
        resources=Resources(cpu_reservation=cpuRequirmentsInNanoSeconds),
        name=name,
        labels = {"instance_name": labels.get("instance_name")},
        hostname = selectedNodeId
    )
Example #5
0
    def _dump_services(self, files, namereplace, spec, yaml_data):
        secrets = []
        for secret in spec['TaskTemplate']['ContainerSpec'].get('Secrets', []):
            secret_name = filename = secret['SecretName']
            secret_orig_name = namereplace.sub('', secret['SecretName'])
            while filename in files:
                filename = inc_name(filename)
            secrets.append({
                "source": filename,
                "target": secret['File']['Name'],
                "uid": secret['File']['UID'],
                "gid": secret['File']['GID'],
                "mode": secret['File']['Mode'],
            })

            yaml_data['secrets'][secret_orig_name] = {
                'file': filename,
                'name': secret_orig_name
            }
            try:
                remanent = self.docker.services.list(filters=dict(name='maiev_get_secret'))
                if remanent:
                    remanent[0].remove()
            except docker.errors.APIError:
                pass
            s = self.docker.services.create(
                'bash', command=['cat', '/tmp/secret'],
                name='maiev_get_secret',
                restart_policy=RestartPolicy(RestartConditionTypesEnum.ON_FAILURE, 5, 1),
                secrets=[SecretReference(secret['SecretID'], secret_name, '/tmp/secret')]
            )
            time.sleep(0.1)
            cnt = 0
            while len(s.tasks({'desired-state': 'running'})) > 0:
                time.sleep(0.5)
                cnt += 1
                if cnt > 60:
                    raise Exception(
                        "unable to retreive secret %s. task did not start: %s" % (
                            secret_name,
                            s.tasks({'desired-state': 'running'})
                        )
                    )

            files[filename] = b"".join(s.logs(stdout=True, follow=False)).decode('utf-8')
            s.remove()
        return secrets
Example #6
0
class DockerWrapper:
    DOCKER_RESTART_POLICY = RestartPolicy(condition='none')

    def __init__(self, client: docker.DockerClient, config: RunnerConfig,
                 authenticator: AuthenticationFactory):
        self._client = client
        self._config = config
        self._authenticator = authenticator
        self._logger = LogManager(__name__)

    def start_task(self, job_id: str, image: str, task_name: str,
                   task_args: Iterable[str]) -> int:
        self._logger.info('Starting task {tn} for job {ji}'.format(
            tn=task_name, ji=job_id))
        run_env = [
            'SWARMER_ADDRESS=http://{addr}:{port}/result/{ident}'.format(
                addr=self._config.host, port=self._config.port, ident=job_id),
            'TASK_NAME={task}'.format(task=task_name),
            'SWARMER_JOB_ID={ident}'.format(ident=job_id)
        ]
        if any(task_args):
            run_env += [
                'RUN_ARGS={args}'.format(
                    args=','.join([str(a) for a in task_args]))
            ]

        svc = self._get_client().services.create(
            image,
            env=run_env,
            restart_policy=self.DOCKER_RESTART_POLICY,
            networks=[self._config.network],
            name='{id}-{name}'.format(id=job_id, name=task_name))
        return svc.id

    def remove_service(self, service_ids: Iterable[int]):
        for sid in service_ids:
            svc = self._client.services.get(sid)
            if svc:
                svc.remove()

    def _get_client(self):
        if self._authenticator and self._authenticator.any_require_login:
            self._authenticator.perform_logins(self._client)
        return self._client
Example #7
0
def launch_docker_kernel(kernel_id, port_range, response_addr, public_key,
                         spark_context_init_mode):
    # Launches a containerized kernel.

    # Can't proceed if no image was specified.
    image_name = os.environ.get('KERNEL_IMAGE', None)
    if image_name is None:
        sys.exit(
            "ERROR - KERNEL_IMAGE not found in environment - kernel launch terminating!"
        )

    # Container name is composed of KERNEL_USERNAME and KERNEL_ID
    container_name = os.environ.get('KERNEL_USERNAME', '') + '-' + kernel_id

    # Determine network. If EG_DOCKER_NETWORK has not been propagated, fall back to 'bridge'...
    docker_network = os.environ.get(
        'DOCKER_NETWORK', os.environ.get('EG_DOCKER_NETWORK', 'bridge'))

    # Build labels - these will be modelled similar to kubernetes: kernel_id, component, app, ...
    labels = dict()
    labels['kernel_id'] = kernel_id
    labels['component'] = 'kernel'
    labels['app'] = 'enterprise-gateway'

    # Capture env parameters...
    param_env = dict()
    param_env['PORT_RANGE'] = port_range
    param_env['PUBLIC_KEY'] = public_key
    param_env['RESPONSE_ADDRESS'] = response_addr
    param_env['KERNEL_SPARK_CONTEXT_INIT_MODE'] = spark_context_init_mode

    # Since the environment is specific to the kernel (per env stanza of kernelspec, KERNEL_ and ENV_WHITELIST)
    # just add the env here.
    param_env.update(os.environ)
    param_env.pop(
        'PATH'
    )  # Let the image PATH be used.  Since this is relative to images, we're probably safe.

    user = param_env.get('KERNEL_UID')
    group = param_env.get('KERNEL_GID')

    # setup common args
    kwargs = dict()
    kwargs['name'] = container_name
    kwargs['hostname'] = container_name
    kwargs['user'] = user
    kwargs['labels'] = labels

    client = DockerClient.from_env()
    if swarm_mode:
        networks = list()
        networks.append(docker_network)
        mounts = list()
        mounts.append(
            "/usr/local/share/jupyter/kernels:/usr/local/share/jupyter/kernels:ro"
        )
        endpoint_spec = EndpointSpec(mode='dnsrr')
        restart_policy = RestartPolicy(condition='none')

        # finish args setup
        kwargs['env'] = param_env
        kwargs['endpoint_spec'] = endpoint_spec
        kwargs['restart_policy'] = restart_policy
        kwargs['container_labels'] = labels
        kwargs['networks'] = networks
        kwargs['groups'] = [group, '100']
        if param_env.get('KERNEL_WORKING_DIR'):
            kwargs['workdir'] = param_env.get('KERNEL_WORKING_DIR')
        # kwargs['mounts'] = mounts   # Enable if necessary
        # print("service args: {}".format(kwargs))  # useful for debug
        kernel_service = client.services.create(image_name, **kwargs)
    else:
        volumes = {
            '/usr/local/share/jupyter/kernels': {
                'bind': '/usr/local/share/jupyter/kernels',
                'mode': 'ro'
            }
        }

        # finish args setup
        kwargs['environment'] = param_env
        kwargs['remove'] = remove_container
        kwargs['network'] = docker_network
        kwargs['group_add'] = [group, '100']
        kwargs['detach'] = True
        if param_env.get('KERNEL_WORKING_DIR'):
            kwargs['working_dir'] = param_env.get('KERNEL_WORKING_DIR')
        # kwargs['volumes'] = volumes   # Enable if necessary
        # print("container args: {}".format(kwargs))  # useful for debug
        kernel_container = client.containers.run(image_name, **kwargs)
        c.CoursewareUserSpawner.group_resources = group_resources

        admin_config = resource_config.get('admin')
        if admin_config is not None:
            r = resources(admin_config)
            c.CoursewareUserSpawner.admin_resources = r
        default_config = resource_config.get('default')
        if default_config is not None:
            r = resources(default_config)
            c.CoursewareUserSpawner.default_resources = r

restart_max_attempts = int(os.environ.get('SPAWNER_RESTART_MAX_ATTEMPTS', '10'))
extra_task_spec = {
    'restart_policy': RestartPolicy(
        condition='any',
        delay=5000000000,
        max_attempts=restart_max_attempts
    )
}
if 'SPAWNER_CONSTRAINTS' in os.environ:
    placement_constraints = os.environ['SPAWNER_CONSTRAINTS']
    extra_task_spec.update({
        'placement': Placement(
            constraints=[x.strip() for x in placement_constraints.split(';')]
        )
    })
c.SwarmSpawner.extra_task_spec = extra_task_spec

if 'JUPYTERHUB_SINGLEUSER_APP' in os.environ:
    c.Spawner.environment = {
        'JUPYTERHUB_SINGLEUSER_APP': os.environ['JUPYTERHUB_SINGLEUSER_APP']
def launch_docker_kernel(kernel_id, port_range, response_addr, public_key, spark_context_init_mode):
    # Launches a containerized kernel.

    # Can't proceed if no image was specified.
    image_name = os.environ.get("KERNEL_IMAGE", None)
    if image_name is None:
        sys.exit("ERROR - KERNEL_IMAGE not found in environment - kernel launch terminating!")

    # Container name is composed of KERNEL_USERNAME and KERNEL_ID
    container_name = os.environ.get("KERNEL_USERNAME", "") + "-" + kernel_id

    # Determine network. If EG_DOCKER_NETWORK has not been propagated, fall back to 'bridge'...
    docker_network = os.environ.get("DOCKER_NETWORK", os.environ.get("EG_DOCKER_NETWORK", "bridge"))

    # Build labels - these will be modelled similar to kubernetes: kernel_id, component, app, ...
    labels = dict()
    labels["kernel_id"] = kernel_id
    labels["component"] = "kernel"
    labels["app"] = "enterprise-gateway"

    # Capture env parameters...
    param_env = dict()
    param_env["PORT_RANGE"] = port_range
    param_env["PUBLIC_KEY"] = public_key
    param_env["RESPONSE_ADDRESS"] = response_addr
    param_env["KERNEL_SPARK_CONTEXT_INIT_MODE"] = spark_context_init_mode

    # Since the environment is specific to the kernel (per env stanza of kernelspec, KERNEL_ and EG_CLIENT_ENVS)
    # just add the env here.
    param_env.update(os.environ)
    param_env.pop(
        "PATH"
    )  # Let the image PATH be used.  Since this is relative to images, we're probably safe.

    user = param_env.get("KERNEL_UID")
    group = param_env.get("KERNEL_GID")

    # setup common args
    kwargs = dict()
    kwargs["name"] = container_name
    kwargs["hostname"] = container_name
    kwargs["user"] = user
    kwargs["labels"] = labels

    client = DockerClient.from_env()
    if swarm_mode:
        networks = list()
        networks.append(docker_network)
        # mounts = list()  # Enable if necessary
        # mounts.append("/usr/local/share/jupyter/kernels:/usr/local/share/jupyter/kernels:ro")
        endpoint_spec = EndpointSpec(mode="dnsrr")
        restart_policy = RestartPolicy(condition="none")

        # finish args setup
        kwargs["env"] = param_env
        kwargs["endpoint_spec"] = endpoint_spec
        kwargs["restart_policy"] = restart_policy
        kwargs["container_labels"] = labels
        kwargs["networks"] = networks
        kwargs["groups"] = [group, "100"]
        if param_env.get("KERNEL_WORKING_DIR"):
            kwargs["workdir"] = param_env.get("KERNEL_WORKING_DIR")
        # kwargs['mounts'] = mounts   # Enable if necessary
        # print("service args: {}".format(kwargs))  # useful for debug
        client.services.create(image_name, **kwargs)  # noqa
    else:
        # volumes = {  # Enable if necessary
        #     "/usr/local/share/jupyter/kernels": {
        #         "bind": "/usr/local/share/jupyter/kernels",
        #         "mode": "ro",
        #     }
        # }

        # finish args setup
        kwargs["environment"] = param_env
        kwargs["remove"] = remove_container
        kwargs["network"] = docker_network
        kwargs["group_add"] = [group, "100"]
        kwargs["detach"] = True
        if param_env.get("KERNEL_WORKING_DIR"):
            kwargs["working_dir"] = param_env.get("KERNEL_WORKING_DIR")
        # kwargs['volumes'] = volumes   # Enable if necessary
        # print("container args: {}".format(kwargs))  # useful for debug
        client.containers.run(image_name, **kwargs)  # noqa
    def _executer_service(self,
                          nom_application: str,
                          configuration_commande: dict,
                          commande: str,
                          image: dict = None):
        configuration_contexte = self.__service_monitor.connexion_middleware.configuration

        docker_secrets_requis = [
            ('pki.monitor.key', 'pki.monitor.key'),
        ]

        # Ajouter mapping pour les secrets dans la configuration
        try:
            secrets_recents = configuration_commande['secrets']
            secrets_recents = [(s['name'], s['filename'])
                               for s in secrets_recents]
            docker_secrets_requis.extend(secrets_recents)
        except KeyError:
            pass

        docker_config_requis = [
            ('pki.millegrille.cert', 'pki.millegrille.cert'),
            ('pki.monitor.cert', 'pki.monitor.cert'),
            ('app.cfg.' + nom_application, 'app.cfg.json'),
        ]

        # Identifier les secrets et configs
        secrets = list()
        for nom_secret in docker_secrets_requis:
            secret = self.__service_monitor.gestionnaire_docker.trouver_secret(
                nom_secret[0])

            secret_reference = dict()
            secret_reference['secret_id'] = secret['secret_id']
            secret_reference['secret_name'] = secret['secret_name']
            secret_reference['filename'] = '/run/secrets/' + nom_secret[1]
            secret_reference['uid'] = 0
            secret_reference['gid'] = 0
            secret_reference['mode'] = 0o444

            secrets.append(SecretReference(**secret_reference))

        configs = list()
        for nom_config in docker_config_requis:
            config = self.__service_monitor.gestionnaire_docker.charger_config_recente(
                nom_config[0])

            config_reference = config['config_reference']
            config_reference['filename'] = '/run/secrets/' + nom_config[1]
            config_reference['uid'] = 0
            config_reference['gid'] = 0
            config_reference['mode'] = 0o444
            configs.append(ConfigReference(**config_reference))

        var_env = [
            "MG_MQ_HOST=" + configuration_contexte.mq_host,
            "MG_MQ_PORT=%d" % configuration_contexte.mq_port, "MG_MQ_SSL=on",
            "MG_MQ_AUTH_CERT=on",
            "MG_MQ_CA_CERTS=/run/secrets/pki.millegrille.cert",
            "MG_MQ_KEYFILE=/run/secrets/pki.monitor.key",
            "MG_MQ_CERTFILE=/run/secrets/pki.monitor.cert",
            "CONFIG_APP=/run/secrets/app.cfg.json"
        ]

        # Ajouter les volumes implicites de scripts et backup
        mounts = [
            'backup_%s:/backup:rw' % nom_application,
            'scripts_%s:/scripts:rw' % nom_application,
        ]
        try:
            volumes = configuration_commande['data']['volumes']
        except KeyError:
            pass
        else:
            for volume in volumes:
                mounts.append(':'.join([volume, '/backup/' + volume, 'rw']))

        docker_client = self.__gestionnaire_modules_docker.docker_client

        # Aller chercher l'image docker pour l'execution du script
        gestionnaire_images = GestionnaireImagesServices(
            configuration_contexte.idmg, docker_client)
        try:
            nom_image = image['image']
            tag = image['version']
        except (TypeError, KeyError):
            image_python = gestionnaire_images.telecharger_image_docker(
                'mg-python')
        else:
            image_python = gestionnaire_images.get_image(nom_image, tag)

        try:
            service = docker_client.services.create(
                image_python.id,
                name="script_application",
                command=commande,
                mounts=mounts,
                env=var_env,
                configs=configs,
                secrets=secrets,
                user="******",
                networks=['millegrille_net'],
                restart_policy=RestartPolicy(condition='none', max_attempts=0),
                constraints=configuration_commande.get('constraints'),
                workdir="/scripts")

            self.__wait_container_event.clear()
            self.__wait_start_service_name = service.name

            # Donner 10 secondes pour demarrer le service. L'image existe deja localement, pas de prep a faire.
            self.__wait_container_event.wait(10)

            if self.__wait_container_event.is_set() is False:
                raise ExceptionExecution(
                    "Erreur demarrage service script application pour " +
                    nom_application,
                    resultat=None)

            self.__wait_die_service_container_id = service.id
            self.__wait_event_die.clear()

            self.__wait_event_die.wait(
                600)  # Donner max de 10 minutes pour le backup

            # Verifier si la tache est en cours d'execution ou si elle a echoue
            service.reload()
            task = service.tasks()[0]
            if task['Status']['State'] == 'failed':
                exit_code = 'N/A'
                try:
                    exit_code = task['Status']['ContainerStatus']['ExitCode']
                except KeyError:
                    pass
                raise ExceptionExecution("Echec d'execution du script : " +
                                         str(exit_code),
                                         resultat=exit_code)

        finally:
            service = self.__gestionnaire_modules_docker.get_service(
                'script_application')
            service.remove()
Example #11
0
def gen_director_service_params(  # pylint: disable=unused-argument
        client: DockerClient, site_id: int,
        site_data: Dict[str, Any]) -> Dict[str, Any]:
    extra_env = {
        "PORT": "80",
        "HOST": "0.0.0.0",
    }

    params = gen_director_shared_params(client, site_id, site_data)

    env = params.pop("env", [])
    env.extend("{}={}".format(name, val) for name, val in extra_env.items())

    # We do the run.sh path detection in the shell so that it can adapt to the path changing without
    # updating the Docker service
    # The killing of the child process is based off of
    # https://unix.stackexchange.com/a/146770/306760
    shell_command = """date +'DIRECTOR: Starting server at %Y-%m-%d %H:%M:%S %Z'
# See docs/UMASK.md before touching this
umask "$1"
for path in /site/run.sh /site/private/run.sh /site/public/run.sh; do
    if [ -x "$path" ]; then
        term() {
            date +'DIRECTOR: Stopping server at %Y-%m-%d %H:%M:%S %Z'
            kill "$child"
        }
        trap term TERM

        "$path" &
        child="$!"

        while ! wait; do true; done
        exec date +'DIRECTOR: Stopped server at %Y-%m-%d %H:%M:%S %Z'
    fi
done
echo 'DIRECTOR: No run.sh file found -- if it exists, make sure it is set as executable'
exec sleep 2147483647"""

    params.update({
        "name":
        get_director_service_name(site_id),
        "read_only":
        True,
        # See docs/UMASK.md before touching this
        "command":
        ["sh", "-c", shell_command, "sh",
         oct(settings.SITE_UMASK)[2:]],
        "workdir":
        "/site/public",
        "networks": ["director-sites"],
        "resources":
        Resources(
            # 0.1 CPUs, 100M or so of memory
            cpu_limit=convert_cpu_limit(site_data["resource_limits"]["cpus"]),
            mem_limit=convert_memory_limit(
                site_data["resource_limits"]["mem_limit"]),
        ),
        "env":
        env,
        "log_driver":
        "json-file",
        "log_driver_options": {
            # Keep minimal logs
            "max-size": "500k",
            "max-file": "1",
        },
        "hosts":
        params.pop("extra_hosts"),
        "stop_grace_period":
        3,
        "endpoint_spec":
        EndpointSpec(mode="vip", ports={}),
        "mode":
        ServiceMode(mode="replicated",
                    replicas=1 if site_data["is_being_served"] else 0),
        "restart_policy":
        RestartPolicy(condition="any", delay=5, max_attempts=5, window=0),
        "update_config":
        UpdateConfig(
            parallelism=1,
            order="stop-first",
            failure_action="rollback",
            max_failure_ratio=0,
            # delay and monitor are in nanoseconds (1e9 seconds)
            delay=int(5 * (10**9)),
            monitor=int(5 * (10**9)),
        ),
    })

    return params
Example #12
0
def launch_docker_kernel(kernel_id, response_addr, spark_context_init_mode):
    # Launches a containerized kernel.

    # Can't proceed if no image was specified.
    image_name = os.environ.get('KERNEL_IMAGE', None)
    if image_name is None:
        sys.exit(
            "ERROR - KERNEL_IMAGE not found in environment - kernel launch terminating!"
        )

    # Container name is composed of KERNEL_USERNAME and KERNEL_ID
    container_name = os.environ.get('KERNEL_USERNAME', '') + '-' + kernel_id

    # Determine network. If EG_DOCKER_NETWORK has not been propagated, fall back to 'bridge'...
    docker_network = os.environ.get('EG_DOCKER_NETWORK', 'bridge')

    # Build labels - these will be modelled similar to kubernetes: kernel_id, component, app, ...
    labels = dict()
    labels['kernel_id'] = kernel_id
    labels['component'] = 'kernel'
    labels['app'] = 'enterprise-gateway'

    # Capture env parameters...
    param_env = dict()
    param_env['EG_RESPONSE_ADDRESS'] = response_addr
    param_env['KERNEL_SPARK_CONTEXT_INIT_MODE'] = spark_context_init_mode

    # Since the environment is specific to the kernel (per env stanza of kernelspec, KERNEL_ and ENV_WHITELIST)
    # just add the env here.
    param_env.update(os.environ)
    param_env.pop(
        'PATH'
    )  # Let the image PATH be used.  Since this is relative to images, we're probably safe.

    user = param_env.get('KERNEL_UID')
    group = param_env.get('KERNEL_GID')

    # setup common args
    kwargs = dict()
    kwargs['name'] = container_name
    kwargs['user'] = user
    kwargs['labels'] = labels

    client = DockerClient.from_env()
    if swarm_mode:
        print("Started Jupyter kernel in swarm-mode")
        networks = list()
        networks.append(docker_network)
        mounts = list()
        mounts.append(
            "/usr/local/share/jupyter/kernels:/usr/local/share/jupyter/kernels:ro"
        )
        endpoint_spec = EndpointSpec(mode='dnsrr')
        restart_policy = RestartPolicy(condition='none')

        # finish args setup
        kwargs['env'] = param_env
        kwargs['endpoint_spec'] = endpoint_spec
        kwargs['restart_policy'] = restart_policy
        kwargs['container_labels'] = labels
        kwargs['networks'] = networks
        kwargs['groups'] = [group, '100']
        if param_env.get('KERNEL_WORKING_DIR'):
            kwargs['workdir'] = param_env.get('KERNEL_WORKING_DIR')
        # kwargs['mounts'] = mounts   # Enable if necessary
        # print("service args: {}".format(kwargs))  # useful for debug
        kernel_service = client.services.create(image_name, **kwargs)
    else:
        print("Started Jupyter kernel in normal docker mode")

        # Note: seems to me that the kernels don't need to be mounted on a container that runs a single kernel

        # mount the kernel working directory from EG to kernel container
        # TODO: mount pipeline directory

        # finish args setup
        kwargs['hostname'] = container_name
        kwargs['environment'] = param_env
        kwargs['remove'] = remove_container
        kwargs['network'] = docker_network
        kwargs['group_add'] = [
            group, '100'
        ]  # NOTE: "group_add" for newer versions of docker
        kwargs['detach'] = True
        if param_env.get('KERNEL_WORKING_DIR'):
            kwargs['working_dir'] = param_env.get('KERNEL_WORKING_DIR')

        pipeline_dir_mount = Mount(target=param_env.get('KERNEL_WORKING_DIR'),
                                   source=param_env.get('HOST_PIPELINE_DIR'),
                                   type='bind')

        mounts = [pipeline_dir_mount]

        # dynamically mount host-dir sources
        dynamic_mounts = get_dynamic_mounts(param_env)

        mounts = mounts + dynamic_mounts

        # print("container args: {}".format(kwargs))  # useful for debug
        kernel_container = client.containers.run(image_name,
                                                 mounts=mounts,
                                                 **kwargs)
Example #13
0
def launch_docker_kernel(kernel_id, response_addr, spark_context_init_mode):
    # Launches a containerized kernel.

    # Can't proceed if no image was specified.
    image_name = os.environ.get("KERNEL_IMAGE", None)
    if image_name is None:
        sys.exit(
            "ERROR - KERNEL_IMAGE not found in environment - kernel launch terminating!"
        )

    # Container name is composed of KERNEL_USERNAME and KERNEL_ID
    container_name = os.environ.get("KERNEL_USERNAME", "") + "-" + kernel_id

    # Determine network. If EG_DOCKER_NETWORK has not been propagated, fall back to 'bridge'...
    docker_network = os.environ.get("EG_DOCKER_NETWORK", "bridge")

    # Build labels - these will be modelled similar to kubernetes: kernel_id, component, app, ...
    labels = dict()
    labels["kernel_id"] = kernel_id
    labels["component"] = "kernel"
    labels["app"] = "enterprise-gateway"

    # Capture env parameters...
    param_env = dict()
    param_env["EG_RESPONSE_ADDRESS"] = response_addr
    param_env["KERNEL_SPARK_CONTEXT_INIT_MODE"] = spark_context_init_mode

    # Since the environment is specific to the kernel (per env stanza of kernelspec, KERNEL_ and ENV_WHITELIST)
    # just add the env here.
    param_env.update(os.environ)
    param_env.pop(
        "PATH"
    )  # Let the image PATH be used.  Since this is relative to images, we're probably safe.

    user = param_env.get("KERNEL_UID")
    group = param_env.get("KERNEL_GID")

    # setup common args
    kwargs = dict()
    kwargs["name"] = container_name
    kwargs["user"] = user
    kwargs["labels"] = labels

    client = DockerClient.from_env()
    if swarm_mode:
        print("Started Jupyter kernel in swarm-mode")
        networks = list()
        networks.append(docker_network)
        mounts = list()
        mounts.append(
            "/usr/local/share/jupyter/kernels:/usr/local/share/jupyter/kernels:ro"
        )
        endpoint_spec = EndpointSpec(mode="dnsrr")
        restart_policy = RestartPolicy(condition="none")

        # finish args setup
        kwargs["env"] = param_env
        kwargs["endpoint_spec"] = endpoint_spec
        kwargs["restart_policy"] = restart_policy
        kwargs["container_labels"] = labels
        kwargs["networks"] = networks
        kwargs["groups"] = [group, "100"]
        if param_env.get("KERNEL_WORKING_DIR"):
            kwargs["workdir"] = param_env.get("KERNEL_WORKING_DIR")
        # kwargs['mounts'] = mounts   # Enable if necessary
        # print("service args: {}".format(kwargs))  # useful for debug
        kernel_service = client.services.create(image_name, **kwargs)
    else:
        print("Started Jupyter kernel in normal docker mode")

        # Note: seems to me that the kernels don't need to be mounted on a container that runs a single kernel

        # mount the kernel working directory from EG to kernel container

        # finish args setup
        kwargs["hostname"] = container_name
        kwargs["environment"] = param_env
        kwargs["remove"] = remove_container
        kwargs["network"] = docker_network
        kwargs["group_add"] = [
            group,
            "100",
        ]  # NOTE: "group_add" for newer versions of docker
        kwargs["detach"] = True
        if param_env.get("KERNEL_WORKING_DIR"):
            kwargs["working_dir"] = param_env.get("KERNEL_WORKING_DIR")

        # print("container args: {}".format(kwargs))  # useful for debug
        orchest_mounts = get_orchest_mounts(
            project_dir=param_env.get("KERNEL_WORKING_DIR"),
            host_project_dir=param_env.get("ORCHEST_HOST_PROJECT_DIR"),
        )
        orchest_mounts += [
            get_volume_mount(
                param_env.get("ORCHEST_PIPELINE_UUID"),
                param_env.get("ORCHEST_PROJECT_UUID"),
            )
        ]

        # Extract environment_uuid from the image name (last 36 characters)
        extracted_environment_uuid = image_name[-36:]

        device_requests = get_device_requests(
            extracted_environment_uuid, param_env.get("ORCHEST_PROJECT_UUID")
        )

        kernel_container = client.containers.run(
            image_name, mounts=orchest_mounts, device_requests=device_requests, **kwargs
        )
Example #14
0
    def test_restart_service_retains_settings(self):
        from docker.types import EndpointSpec, Resources, RestartPolicy, SecretReference, UpdateConfig

        join_command = self.init_swarm()

        with self.with_dind_container() as second_dind:
            self.prepare_images('alpine', client=self.dind_client(second_dind))

            second_dind.exec_run(join_command)

            network = self.remote_client.networks.create('pygen-net',
                                                         driver='overlay')

            secret = self.remote_client.secrets.create(name='pygen-secret',
                                                       data='TopSecret')

            secret.reload()

            service = self.remote_client.services.create(
                'alpine',
                name='target-svc',
                mode='global',
                command='sh -c "date +%s ; sleep 3600"',
                constraints=['node.hostname != non-existing-node'],
                container_labels={'container.label': 'testing'},
                dns_config={'Nameservers': ['8.8.8.8']},
                endpoint_spec=EndpointSpec(mode='vip', ports={14002: 1234}),
                env=['TEST_ENV_VAR=12345'],
                labels={'service.label': 'on-service'},
                mounts=['/tmp:/data/hosttmp:ro'],
                networks=[network.name],
                resources=Resources(mem_limit=24000000),
                restart_policy=RestartPolicy(condition='any',
                                             delay=5,
                                             max_attempts=3),
                secrets=[
                    SecretReference(secret_id=secret.id,
                                    secret_name=secret.name)
                ],
                stop_grace_period=1,
                update_config=UpdateConfig(parallelism=1,
                                           delay=1,
                                           monitor=7200000000),
                user='******',
                workdir='/data/hosttmp',
                tty=True)

            self.wait_for_service_start(service, num_tasks=2)

            service.reload()

            initial_spec = service.attrs['Spec']

            command = ['--template #ok', '--restart target-svc', '--one-shot']

            self.remote_client.containers.run(
                'pygen-build',
                command=' '.join(command),
                remove=True,
                volumes=['/var/run/docker.sock:/var/run/docker.sock:ro'])

            self.wait_for_service_start(service, num_tasks=4)

            service = self.remote_client.services.get(service.id)

            service.reload()

            newer_spec = service.attrs['Spec']

            del initial_spec['TaskTemplate']['ForceUpdate']
            del newer_spec['TaskTemplate']['ForceUpdate']

            initial_networks = initial_spec.pop(
                'Networks', initial_spec['TaskTemplate'].pop('Networks', []))
            newer_networks = newer_spec.pop(
                'Networks', newer_spec['TaskTemplate'].pop('Networks', []))

            self.maxDiff = None

            self.assertGreater(len(newer_networks), 0)
            self.assertEqual(newer_networks, initial_networks)
            self.assertDictEqual(newer_spec, initial_spec)
Example #15
0
def make_service(
    name,
    cmd,
    cmd_args,
    image,
    image_pull_policy='Always',
    image_pull_secret=None,
    port_list=None,
    env=[],
    networks=[],
    working_dir=None,
    configs=None,
    volumes=None,
    volume_mounts=None,
    labels={},
    cpu_limit=None,
    cpu_guarantee=None,
    mem_limit=None,
    mem_guarantee=None,
    gpu_limit=None,
    gpu_guarantee=None,
):
    args = (image, )
    kwargs = {}

    kwargs['name'] = name
    kwargs['command'] = cmd
    kwargs['args'] = cmd_args
    kwargs['container_labels'] = labels
    kwargs['env'] = ['{0}={1}'.format(i['name'], i['value'])
                     for i in env] if env else []
    kwargs['labels'] = labels

    volume_d = {}
    config_d = {}
    volume_config_name_map = {}
    for _vol in volumes:
        if 'configMap' in _vol:
            config_prefix = _vol['configMap']['name']
            config_d[config_prefix] = {}
            volume_config_name_map[_vol['name']] = config_prefix
            for config_item in _vol['configMap']['items']:
                config_name = '{cfg_prefix}-{cfg_name}'.format(
                    cfg_prefix=config_prefix, cfg_name=config_item['key'])
                config_d[config_prefix][config_name] = {}
                config_d[config_prefix][config_name][
                    'config_name'] = config_name
                config_d[config_prefix][config_name]['filename'] = config_item[
                    'path']
        else:
            volume_d[_vol['name']] = {}
            volume_d[_vol['name']]['vol'] = _vol

    for _mount in volume_mounts:
        if _mount['name'] in volume_config_name_map:
            for _, config_i in config_d[volume_config_name_map[
                    _mount['name']]].items():
                config_i['filename'] = os.path.join(_mount['mountPath'],
                                                    config_i['filename'])
        else:
            volume_d[_mount['name']]['mount'] = _mount

    # mounts
    # Mounts for the containers,
    # in the form source:target:options, where options is either ro or rw
    mounts = []
    for _, vol in volume_d.items():
        if 'hostPath' in vol['vol']:
            _src = vol['vol']['hostPath']['path']
            _target = vol['mount']['mountPath']
            _opt = 'ro' if vol['mount']['readOnly'] else 'rw'
            _mount_str = f"{_src}:{_target}:{_opt}"
            mounts.append(_mount_str)
    kwargs['mounts'] = mounts

    # config_refs
    # List of ConfigReference that will be exposed to the service.
    configs_name_map = {i.name: i for i in configs}
    config_refs = []
    for _, config_items in config_d.items():
        for _, config in config_items.items():
            config['config_id'] = configs_name_map[config['config_name']].id
            config_ref = ConfigReference(config['config_id'],
                                         config['config_name'],
                                         config['filename'])
            config_refs.append(config_ref)
    kwargs['configs'] = config_refs

    # resources
    # Resource limits and reservations.
    # resources = Resources(
    #                 cpu_limit=cpu_limit,
    #                 cpu_reservation=cpu_guarantee,
    #             )
    # kwargs['resources'] = resources

    kwargs['restart_policy'] = RestartPolicy()
    # TODO: replace hardcode network neuf-system
    kwargs['networks'] = networks
    return args, kwargs