def run_service(service_name, image_name, command, restart_condition, networks, s_and_t, ports, mode='global', replicas=1): docker = MANAGER_CLIENR from dsp.image.valify import valify #if not valify(image_name): # return "找不到应用指定的镜像!" # Mount对象的集合 mounts = [] image_name = '192.168.123.251/'+image_name for s_t in s_and_t: type = False if s_t.get('read_only', '') == 'yes': type = True m = Mount(source=s_t.get('source', ''), target=s_t.get('target', ''), read_only=type) mounts.append(m) print s_t.get('source', '') #containSpec = ContainerSpec( image=image_name, mounts=mounts, command=command, tty=True, open_stdin=True) containSpec = ContainerSpec( image=image_name, command=command, tty=True, open_stdin=True) # 声明TaskTemplate--->task # restart_condition --->none, on - failure, or any restart_policy = RestartPolicy(condition=restart_condition) task = TaskTemplate(container_spec=containSpec, restart_policy=restart_policy) # 应用启动模式 if mode == "global": replicas = None service_mode = ServiceMode(mode=mode, replicas=replicas) # 接入点,包括负载均衡模式设置等 end_point = EndpointSpec(ports=ports) service_id = docker.create_service(task_template=task, name=service_name, mode=service_mode, networks=networks, endpoint_spec=end_point) return service_id
def createService(self, service_name, image, ports, restart, cpu_limit, cpu_reservation, mem_reservation, mem_limit, volumes, environment, labels, command, networks): try: self.is_deploying = True resources = DockerResources(mem_limit=mem_limit, mem_reservation=mem_reservation, cpu_limit=cpu_limit, cpu_reservation=cpu_reservation) restart_policy = RestartPolicy( condition=restart['name'], delay=10, max_attempts=restart['MaximumRetryCount']) endpoints = EndpointSpec(mode='vip', ports=ports) self.docker_client.services.create(image=image, name=service_name, hostname=service_name, restart_policy=restart_policy, endpoint_spec=endpoints, resources=resources, mounts=volumes, env=environment, labels=labels, command=command, networks=networks) logging.info("{0} component deployed".format(service_name)) self.is_deploying = False return resources except Exception as e: print(e) self.is_deploying = False return None
def launch_docker_kernel(kernel_id, response_addr, spark_context_init_mode): # Launches a containerized kernel. # Can't proceed if no image was specified. image_name = os.environ.get('KERNEL_IMAGE', None) if image_name is None: sys.exit("ERROR - KERNEL_IMAGE not found in environment - kernel launch terminating!") # Container name is composed of KERNEL_USERNAME and KERNEL_ID container_name = os.environ.get('KERNEL_USERNAME', '') + '-' + kernel_id # Determine network. If EG_DOCKER_NETWORK has not been propagated, fall back to 'bridge'... docker_network = os.environ.get('EG_DOCKER_NETWORK', 'bridge') # Build labels - these will be modelled similar to kubernetes: kernel_id, component, app, ... labels = dict() labels['kernel_id'] = kernel_id labels['component'] = 'kernel' labels['app'] = 'enterprise-gateway' # Capture env parameters... param_env = dict() param_env['EG_RESPONSE_ADDRESS'] = response_addr param_env['KERNEL_SPARK_CONTEXT_INIT_MODE'] = spark_context_init_mode # Since the environment is specific to the kernel (per env stanza of kernelspec, KERNEL_ and ENV_WHITELIST) # just add the env here. param_env.update(os.environ) param_env.pop('PATH') # Let the image PATH be used. Since this is relative to images, we're probably safe. client = DockerClient.from_env() if swarm_mode: networks = list() networks.append(docker_network) mounts = list() mounts.append("/usr/local/share/jupyter/kernels:/usr/local/share/jupyter/kernels:ro") endpoint_spec = EndpointSpec(mode='dnsrr') restart_policy = RestartPolicy(condition='none') kernel_service = client.services.create(image_name, name=container_name, endpoint_spec=endpoint_spec, restart_policy=restart_policy, env=param_env, container_labels=labels, labels=labels, #mounts=mounts, # Enable if necessary networks=networks) else: volumes = {'/usr/local/share/jupyter/kernels': {'bind': '/usr/local/share/jupyter/kernels', 'mode': 'ro'}} kernel_container = client.containers.run(image_name, name=container_name, hostname=container_name, environment=param_env, labels=labels, remove=remove_container, network=docker_network, #volumes=volumes, # Enable if necessary detach=True)
def createService(image, command, cpuRequirments, name, labels, selectedNodeId): client = docker.from_env() cpuRequirmentsInNanoSeconds = cpuRequirments * 1000000000 client.services.create( image, command, constraints=["node.role == worker", "node.id == " + selectedNodeId], mode=ServiceMode("replicated", 1), restart_policy=RestartPolicy(condition='none'), resources=Resources(cpu_reservation=cpuRequirmentsInNanoSeconds), name=name, labels = {"instance_name": labels.get("instance_name")}, hostname = selectedNodeId )
def _dump_services(self, files, namereplace, spec, yaml_data): secrets = [] for secret in spec['TaskTemplate']['ContainerSpec'].get('Secrets', []): secret_name = filename = secret['SecretName'] secret_orig_name = namereplace.sub('', secret['SecretName']) while filename in files: filename = inc_name(filename) secrets.append({ "source": filename, "target": secret['File']['Name'], "uid": secret['File']['UID'], "gid": secret['File']['GID'], "mode": secret['File']['Mode'], }) yaml_data['secrets'][secret_orig_name] = { 'file': filename, 'name': secret_orig_name } try: remanent = self.docker.services.list(filters=dict(name='maiev_get_secret')) if remanent: remanent[0].remove() except docker.errors.APIError: pass s = self.docker.services.create( 'bash', command=['cat', '/tmp/secret'], name='maiev_get_secret', restart_policy=RestartPolicy(RestartConditionTypesEnum.ON_FAILURE, 5, 1), secrets=[SecretReference(secret['SecretID'], secret_name, '/tmp/secret')] ) time.sleep(0.1) cnt = 0 while len(s.tasks({'desired-state': 'running'})) > 0: time.sleep(0.5) cnt += 1 if cnt > 60: raise Exception( "unable to retreive secret %s. task did not start: %s" % ( secret_name, s.tasks({'desired-state': 'running'}) ) ) files[filename] = b"".join(s.logs(stdout=True, follow=False)).decode('utf-8') s.remove() return secrets
class DockerWrapper: DOCKER_RESTART_POLICY = RestartPolicy(condition='none') def __init__(self, client: docker.DockerClient, config: RunnerConfig, authenticator: AuthenticationFactory): self._client = client self._config = config self._authenticator = authenticator self._logger = LogManager(__name__) def start_task(self, job_id: str, image: str, task_name: str, task_args: Iterable[str]) -> int: self._logger.info('Starting task {tn} for job {ji}'.format( tn=task_name, ji=job_id)) run_env = [ 'SWARMER_ADDRESS=http://{addr}:{port}/result/{ident}'.format( addr=self._config.host, port=self._config.port, ident=job_id), 'TASK_NAME={task}'.format(task=task_name), 'SWARMER_JOB_ID={ident}'.format(ident=job_id) ] if any(task_args): run_env += [ 'RUN_ARGS={args}'.format( args=','.join([str(a) for a in task_args])) ] svc = self._get_client().services.create( image, env=run_env, restart_policy=self.DOCKER_RESTART_POLICY, networks=[self._config.network], name='{id}-{name}'.format(id=job_id, name=task_name)) return svc.id def remove_service(self, service_ids: Iterable[int]): for sid in service_ids: svc = self._client.services.get(sid) if svc: svc.remove() def _get_client(self): if self._authenticator and self._authenticator.any_require_login: self._authenticator.perform_logins(self._client) return self._client
def launch_docker_kernel(kernel_id, port_range, response_addr, public_key, spark_context_init_mode): # Launches a containerized kernel. # Can't proceed if no image was specified. image_name = os.environ.get('KERNEL_IMAGE', None) if image_name is None: sys.exit( "ERROR - KERNEL_IMAGE not found in environment - kernel launch terminating!" ) # Container name is composed of KERNEL_USERNAME and KERNEL_ID container_name = os.environ.get('KERNEL_USERNAME', '') + '-' + kernel_id # Determine network. If EG_DOCKER_NETWORK has not been propagated, fall back to 'bridge'... docker_network = os.environ.get( 'DOCKER_NETWORK', os.environ.get('EG_DOCKER_NETWORK', 'bridge')) # Build labels - these will be modelled similar to kubernetes: kernel_id, component, app, ... labels = dict() labels['kernel_id'] = kernel_id labels['component'] = 'kernel' labels['app'] = 'enterprise-gateway' # Capture env parameters... param_env = dict() param_env['PORT_RANGE'] = port_range param_env['PUBLIC_KEY'] = public_key param_env['RESPONSE_ADDRESS'] = response_addr param_env['KERNEL_SPARK_CONTEXT_INIT_MODE'] = spark_context_init_mode # Since the environment is specific to the kernel (per env stanza of kernelspec, KERNEL_ and ENV_WHITELIST) # just add the env here. param_env.update(os.environ) param_env.pop( 'PATH' ) # Let the image PATH be used. Since this is relative to images, we're probably safe. user = param_env.get('KERNEL_UID') group = param_env.get('KERNEL_GID') # setup common args kwargs = dict() kwargs['name'] = container_name kwargs['hostname'] = container_name kwargs['user'] = user kwargs['labels'] = labels client = DockerClient.from_env() if swarm_mode: networks = list() networks.append(docker_network) mounts = list() mounts.append( "/usr/local/share/jupyter/kernels:/usr/local/share/jupyter/kernels:ro" ) endpoint_spec = EndpointSpec(mode='dnsrr') restart_policy = RestartPolicy(condition='none') # finish args setup kwargs['env'] = param_env kwargs['endpoint_spec'] = endpoint_spec kwargs['restart_policy'] = restart_policy kwargs['container_labels'] = labels kwargs['networks'] = networks kwargs['groups'] = [group, '100'] if param_env.get('KERNEL_WORKING_DIR'): kwargs['workdir'] = param_env.get('KERNEL_WORKING_DIR') # kwargs['mounts'] = mounts # Enable if necessary # print("service args: {}".format(kwargs)) # useful for debug kernel_service = client.services.create(image_name, **kwargs) else: volumes = { '/usr/local/share/jupyter/kernels': { 'bind': '/usr/local/share/jupyter/kernels', 'mode': 'ro' } } # finish args setup kwargs['environment'] = param_env kwargs['remove'] = remove_container kwargs['network'] = docker_network kwargs['group_add'] = [group, '100'] kwargs['detach'] = True if param_env.get('KERNEL_WORKING_DIR'): kwargs['working_dir'] = param_env.get('KERNEL_WORKING_DIR') # kwargs['volumes'] = volumes # Enable if necessary # print("container args: {}".format(kwargs)) # useful for debug kernel_container = client.containers.run(image_name, **kwargs)
c.CoursewareUserSpawner.group_resources = group_resources admin_config = resource_config.get('admin') if admin_config is not None: r = resources(admin_config) c.CoursewareUserSpawner.admin_resources = r default_config = resource_config.get('default') if default_config is not None: r = resources(default_config) c.CoursewareUserSpawner.default_resources = r restart_max_attempts = int(os.environ.get('SPAWNER_RESTART_MAX_ATTEMPTS', '10')) extra_task_spec = { 'restart_policy': RestartPolicy( condition='any', delay=5000000000, max_attempts=restart_max_attempts ) } if 'SPAWNER_CONSTRAINTS' in os.environ: placement_constraints = os.environ['SPAWNER_CONSTRAINTS'] extra_task_spec.update({ 'placement': Placement( constraints=[x.strip() for x in placement_constraints.split(';')] ) }) c.SwarmSpawner.extra_task_spec = extra_task_spec if 'JUPYTERHUB_SINGLEUSER_APP' in os.environ: c.Spawner.environment = { 'JUPYTERHUB_SINGLEUSER_APP': os.environ['JUPYTERHUB_SINGLEUSER_APP']
def launch_docker_kernel(kernel_id, port_range, response_addr, public_key, spark_context_init_mode): # Launches a containerized kernel. # Can't proceed if no image was specified. image_name = os.environ.get("KERNEL_IMAGE", None) if image_name is None: sys.exit("ERROR - KERNEL_IMAGE not found in environment - kernel launch terminating!") # Container name is composed of KERNEL_USERNAME and KERNEL_ID container_name = os.environ.get("KERNEL_USERNAME", "") + "-" + kernel_id # Determine network. If EG_DOCKER_NETWORK has not been propagated, fall back to 'bridge'... docker_network = os.environ.get("DOCKER_NETWORK", os.environ.get("EG_DOCKER_NETWORK", "bridge")) # Build labels - these will be modelled similar to kubernetes: kernel_id, component, app, ... labels = dict() labels["kernel_id"] = kernel_id labels["component"] = "kernel" labels["app"] = "enterprise-gateway" # Capture env parameters... param_env = dict() param_env["PORT_RANGE"] = port_range param_env["PUBLIC_KEY"] = public_key param_env["RESPONSE_ADDRESS"] = response_addr param_env["KERNEL_SPARK_CONTEXT_INIT_MODE"] = spark_context_init_mode # Since the environment is specific to the kernel (per env stanza of kernelspec, KERNEL_ and EG_CLIENT_ENVS) # just add the env here. param_env.update(os.environ) param_env.pop( "PATH" ) # Let the image PATH be used. Since this is relative to images, we're probably safe. user = param_env.get("KERNEL_UID") group = param_env.get("KERNEL_GID") # setup common args kwargs = dict() kwargs["name"] = container_name kwargs["hostname"] = container_name kwargs["user"] = user kwargs["labels"] = labels client = DockerClient.from_env() if swarm_mode: networks = list() networks.append(docker_network) # mounts = list() # Enable if necessary # mounts.append("/usr/local/share/jupyter/kernels:/usr/local/share/jupyter/kernels:ro") endpoint_spec = EndpointSpec(mode="dnsrr") restart_policy = RestartPolicy(condition="none") # finish args setup kwargs["env"] = param_env kwargs["endpoint_spec"] = endpoint_spec kwargs["restart_policy"] = restart_policy kwargs["container_labels"] = labels kwargs["networks"] = networks kwargs["groups"] = [group, "100"] if param_env.get("KERNEL_WORKING_DIR"): kwargs["workdir"] = param_env.get("KERNEL_WORKING_DIR") # kwargs['mounts'] = mounts # Enable if necessary # print("service args: {}".format(kwargs)) # useful for debug client.services.create(image_name, **kwargs) # noqa else: # volumes = { # Enable if necessary # "/usr/local/share/jupyter/kernels": { # "bind": "/usr/local/share/jupyter/kernels", # "mode": "ro", # } # } # finish args setup kwargs["environment"] = param_env kwargs["remove"] = remove_container kwargs["network"] = docker_network kwargs["group_add"] = [group, "100"] kwargs["detach"] = True if param_env.get("KERNEL_WORKING_DIR"): kwargs["working_dir"] = param_env.get("KERNEL_WORKING_DIR") # kwargs['volumes'] = volumes # Enable if necessary # print("container args: {}".format(kwargs)) # useful for debug client.containers.run(image_name, **kwargs) # noqa
def _executer_service(self, nom_application: str, configuration_commande: dict, commande: str, image: dict = None): configuration_contexte = self.__service_monitor.connexion_middleware.configuration docker_secrets_requis = [ ('pki.monitor.key', 'pki.monitor.key'), ] # Ajouter mapping pour les secrets dans la configuration try: secrets_recents = configuration_commande['secrets'] secrets_recents = [(s['name'], s['filename']) for s in secrets_recents] docker_secrets_requis.extend(secrets_recents) except KeyError: pass docker_config_requis = [ ('pki.millegrille.cert', 'pki.millegrille.cert'), ('pki.monitor.cert', 'pki.monitor.cert'), ('app.cfg.' + nom_application, 'app.cfg.json'), ] # Identifier les secrets et configs secrets = list() for nom_secret in docker_secrets_requis: secret = self.__service_monitor.gestionnaire_docker.trouver_secret( nom_secret[0]) secret_reference = dict() secret_reference['secret_id'] = secret['secret_id'] secret_reference['secret_name'] = secret['secret_name'] secret_reference['filename'] = '/run/secrets/' + nom_secret[1] secret_reference['uid'] = 0 secret_reference['gid'] = 0 secret_reference['mode'] = 0o444 secrets.append(SecretReference(**secret_reference)) configs = list() for nom_config in docker_config_requis: config = self.__service_monitor.gestionnaire_docker.charger_config_recente( nom_config[0]) config_reference = config['config_reference'] config_reference['filename'] = '/run/secrets/' + nom_config[1] config_reference['uid'] = 0 config_reference['gid'] = 0 config_reference['mode'] = 0o444 configs.append(ConfigReference(**config_reference)) var_env = [ "MG_MQ_HOST=" + configuration_contexte.mq_host, "MG_MQ_PORT=%d" % configuration_contexte.mq_port, "MG_MQ_SSL=on", "MG_MQ_AUTH_CERT=on", "MG_MQ_CA_CERTS=/run/secrets/pki.millegrille.cert", "MG_MQ_KEYFILE=/run/secrets/pki.monitor.key", "MG_MQ_CERTFILE=/run/secrets/pki.monitor.cert", "CONFIG_APP=/run/secrets/app.cfg.json" ] # Ajouter les volumes implicites de scripts et backup mounts = [ 'backup_%s:/backup:rw' % nom_application, 'scripts_%s:/scripts:rw' % nom_application, ] try: volumes = configuration_commande['data']['volumes'] except KeyError: pass else: for volume in volumes: mounts.append(':'.join([volume, '/backup/' + volume, 'rw'])) docker_client = self.__gestionnaire_modules_docker.docker_client # Aller chercher l'image docker pour l'execution du script gestionnaire_images = GestionnaireImagesServices( configuration_contexte.idmg, docker_client) try: nom_image = image['image'] tag = image['version'] except (TypeError, KeyError): image_python = gestionnaire_images.telecharger_image_docker( 'mg-python') else: image_python = gestionnaire_images.get_image(nom_image, tag) try: service = docker_client.services.create( image_python.id, name="script_application", command=commande, mounts=mounts, env=var_env, configs=configs, secrets=secrets, user="******", networks=['millegrille_net'], restart_policy=RestartPolicy(condition='none', max_attempts=0), constraints=configuration_commande.get('constraints'), workdir="/scripts") self.__wait_container_event.clear() self.__wait_start_service_name = service.name # Donner 10 secondes pour demarrer le service. L'image existe deja localement, pas de prep a faire. self.__wait_container_event.wait(10) if self.__wait_container_event.is_set() is False: raise ExceptionExecution( "Erreur demarrage service script application pour " + nom_application, resultat=None) self.__wait_die_service_container_id = service.id self.__wait_event_die.clear() self.__wait_event_die.wait( 600) # Donner max de 10 minutes pour le backup # Verifier si la tache est en cours d'execution ou si elle a echoue service.reload() task = service.tasks()[0] if task['Status']['State'] == 'failed': exit_code = 'N/A' try: exit_code = task['Status']['ContainerStatus']['ExitCode'] except KeyError: pass raise ExceptionExecution("Echec d'execution du script : " + str(exit_code), resultat=exit_code) finally: service = self.__gestionnaire_modules_docker.get_service( 'script_application') service.remove()
def gen_director_service_params( # pylint: disable=unused-argument client: DockerClient, site_id: int, site_data: Dict[str, Any]) -> Dict[str, Any]: extra_env = { "PORT": "80", "HOST": "0.0.0.0", } params = gen_director_shared_params(client, site_id, site_data) env = params.pop("env", []) env.extend("{}={}".format(name, val) for name, val in extra_env.items()) # We do the run.sh path detection in the shell so that it can adapt to the path changing without # updating the Docker service # The killing of the child process is based off of # https://unix.stackexchange.com/a/146770/306760 shell_command = """date +'DIRECTOR: Starting server at %Y-%m-%d %H:%M:%S %Z' # See docs/UMASK.md before touching this umask "$1" for path in /site/run.sh /site/private/run.sh /site/public/run.sh; do if [ -x "$path" ]; then term() { date +'DIRECTOR: Stopping server at %Y-%m-%d %H:%M:%S %Z' kill "$child" } trap term TERM "$path" & child="$!" while ! wait; do true; done exec date +'DIRECTOR: Stopped server at %Y-%m-%d %H:%M:%S %Z' fi done echo 'DIRECTOR: No run.sh file found -- if it exists, make sure it is set as executable' exec sleep 2147483647""" params.update({ "name": get_director_service_name(site_id), "read_only": True, # See docs/UMASK.md before touching this "command": ["sh", "-c", shell_command, "sh", oct(settings.SITE_UMASK)[2:]], "workdir": "/site/public", "networks": ["director-sites"], "resources": Resources( # 0.1 CPUs, 100M or so of memory cpu_limit=convert_cpu_limit(site_data["resource_limits"]["cpus"]), mem_limit=convert_memory_limit( site_data["resource_limits"]["mem_limit"]), ), "env": env, "log_driver": "json-file", "log_driver_options": { # Keep minimal logs "max-size": "500k", "max-file": "1", }, "hosts": params.pop("extra_hosts"), "stop_grace_period": 3, "endpoint_spec": EndpointSpec(mode="vip", ports={}), "mode": ServiceMode(mode="replicated", replicas=1 if site_data["is_being_served"] else 0), "restart_policy": RestartPolicy(condition="any", delay=5, max_attempts=5, window=0), "update_config": UpdateConfig( parallelism=1, order="stop-first", failure_action="rollback", max_failure_ratio=0, # delay and monitor are in nanoseconds (1e9 seconds) delay=int(5 * (10**9)), monitor=int(5 * (10**9)), ), }) return params
def launch_docker_kernel(kernel_id, response_addr, spark_context_init_mode): # Launches a containerized kernel. # Can't proceed if no image was specified. image_name = os.environ.get('KERNEL_IMAGE', None) if image_name is None: sys.exit( "ERROR - KERNEL_IMAGE not found in environment - kernel launch terminating!" ) # Container name is composed of KERNEL_USERNAME and KERNEL_ID container_name = os.environ.get('KERNEL_USERNAME', '') + '-' + kernel_id # Determine network. If EG_DOCKER_NETWORK has not been propagated, fall back to 'bridge'... docker_network = os.environ.get('EG_DOCKER_NETWORK', 'bridge') # Build labels - these will be modelled similar to kubernetes: kernel_id, component, app, ... labels = dict() labels['kernel_id'] = kernel_id labels['component'] = 'kernel' labels['app'] = 'enterprise-gateway' # Capture env parameters... param_env = dict() param_env['EG_RESPONSE_ADDRESS'] = response_addr param_env['KERNEL_SPARK_CONTEXT_INIT_MODE'] = spark_context_init_mode # Since the environment is specific to the kernel (per env stanza of kernelspec, KERNEL_ and ENV_WHITELIST) # just add the env here. param_env.update(os.environ) param_env.pop( 'PATH' ) # Let the image PATH be used. Since this is relative to images, we're probably safe. user = param_env.get('KERNEL_UID') group = param_env.get('KERNEL_GID') # setup common args kwargs = dict() kwargs['name'] = container_name kwargs['user'] = user kwargs['labels'] = labels client = DockerClient.from_env() if swarm_mode: print("Started Jupyter kernel in swarm-mode") networks = list() networks.append(docker_network) mounts = list() mounts.append( "/usr/local/share/jupyter/kernels:/usr/local/share/jupyter/kernels:ro" ) endpoint_spec = EndpointSpec(mode='dnsrr') restart_policy = RestartPolicy(condition='none') # finish args setup kwargs['env'] = param_env kwargs['endpoint_spec'] = endpoint_spec kwargs['restart_policy'] = restart_policy kwargs['container_labels'] = labels kwargs['networks'] = networks kwargs['groups'] = [group, '100'] if param_env.get('KERNEL_WORKING_DIR'): kwargs['workdir'] = param_env.get('KERNEL_WORKING_DIR') # kwargs['mounts'] = mounts # Enable if necessary # print("service args: {}".format(kwargs)) # useful for debug kernel_service = client.services.create(image_name, **kwargs) else: print("Started Jupyter kernel in normal docker mode") # Note: seems to me that the kernels don't need to be mounted on a container that runs a single kernel # mount the kernel working directory from EG to kernel container # TODO: mount pipeline directory # finish args setup kwargs['hostname'] = container_name kwargs['environment'] = param_env kwargs['remove'] = remove_container kwargs['network'] = docker_network kwargs['group_add'] = [ group, '100' ] # NOTE: "group_add" for newer versions of docker kwargs['detach'] = True if param_env.get('KERNEL_WORKING_DIR'): kwargs['working_dir'] = param_env.get('KERNEL_WORKING_DIR') pipeline_dir_mount = Mount(target=param_env.get('KERNEL_WORKING_DIR'), source=param_env.get('HOST_PIPELINE_DIR'), type='bind') mounts = [pipeline_dir_mount] # dynamically mount host-dir sources dynamic_mounts = get_dynamic_mounts(param_env) mounts = mounts + dynamic_mounts # print("container args: {}".format(kwargs)) # useful for debug kernel_container = client.containers.run(image_name, mounts=mounts, **kwargs)
def launch_docker_kernel(kernel_id, response_addr, spark_context_init_mode): # Launches a containerized kernel. # Can't proceed if no image was specified. image_name = os.environ.get("KERNEL_IMAGE", None) if image_name is None: sys.exit( "ERROR - KERNEL_IMAGE not found in environment - kernel launch terminating!" ) # Container name is composed of KERNEL_USERNAME and KERNEL_ID container_name = os.environ.get("KERNEL_USERNAME", "") + "-" + kernel_id # Determine network. If EG_DOCKER_NETWORK has not been propagated, fall back to 'bridge'... docker_network = os.environ.get("EG_DOCKER_NETWORK", "bridge") # Build labels - these will be modelled similar to kubernetes: kernel_id, component, app, ... labels = dict() labels["kernel_id"] = kernel_id labels["component"] = "kernel" labels["app"] = "enterprise-gateway" # Capture env parameters... param_env = dict() param_env["EG_RESPONSE_ADDRESS"] = response_addr param_env["KERNEL_SPARK_CONTEXT_INIT_MODE"] = spark_context_init_mode # Since the environment is specific to the kernel (per env stanza of kernelspec, KERNEL_ and ENV_WHITELIST) # just add the env here. param_env.update(os.environ) param_env.pop( "PATH" ) # Let the image PATH be used. Since this is relative to images, we're probably safe. user = param_env.get("KERNEL_UID") group = param_env.get("KERNEL_GID") # setup common args kwargs = dict() kwargs["name"] = container_name kwargs["user"] = user kwargs["labels"] = labels client = DockerClient.from_env() if swarm_mode: print("Started Jupyter kernel in swarm-mode") networks = list() networks.append(docker_network) mounts = list() mounts.append( "/usr/local/share/jupyter/kernels:/usr/local/share/jupyter/kernels:ro" ) endpoint_spec = EndpointSpec(mode="dnsrr") restart_policy = RestartPolicy(condition="none") # finish args setup kwargs["env"] = param_env kwargs["endpoint_spec"] = endpoint_spec kwargs["restart_policy"] = restart_policy kwargs["container_labels"] = labels kwargs["networks"] = networks kwargs["groups"] = [group, "100"] if param_env.get("KERNEL_WORKING_DIR"): kwargs["workdir"] = param_env.get("KERNEL_WORKING_DIR") # kwargs['mounts'] = mounts # Enable if necessary # print("service args: {}".format(kwargs)) # useful for debug kernel_service = client.services.create(image_name, **kwargs) else: print("Started Jupyter kernel in normal docker mode") # Note: seems to me that the kernels don't need to be mounted on a container that runs a single kernel # mount the kernel working directory from EG to kernel container # finish args setup kwargs["hostname"] = container_name kwargs["environment"] = param_env kwargs["remove"] = remove_container kwargs["network"] = docker_network kwargs["group_add"] = [ group, "100", ] # NOTE: "group_add" for newer versions of docker kwargs["detach"] = True if param_env.get("KERNEL_WORKING_DIR"): kwargs["working_dir"] = param_env.get("KERNEL_WORKING_DIR") # print("container args: {}".format(kwargs)) # useful for debug orchest_mounts = get_orchest_mounts( project_dir=param_env.get("KERNEL_WORKING_DIR"), host_project_dir=param_env.get("ORCHEST_HOST_PROJECT_DIR"), ) orchest_mounts += [ get_volume_mount( param_env.get("ORCHEST_PIPELINE_UUID"), param_env.get("ORCHEST_PROJECT_UUID"), ) ] # Extract environment_uuid from the image name (last 36 characters) extracted_environment_uuid = image_name[-36:] device_requests = get_device_requests( extracted_environment_uuid, param_env.get("ORCHEST_PROJECT_UUID") ) kernel_container = client.containers.run( image_name, mounts=orchest_mounts, device_requests=device_requests, **kwargs )
def test_restart_service_retains_settings(self): from docker.types import EndpointSpec, Resources, RestartPolicy, SecretReference, UpdateConfig join_command = self.init_swarm() with self.with_dind_container() as second_dind: self.prepare_images('alpine', client=self.dind_client(second_dind)) second_dind.exec_run(join_command) network = self.remote_client.networks.create('pygen-net', driver='overlay') secret = self.remote_client.secrets.create(name='pygen-secret', data='TopSecret') secret.reload() service = self.remote_client.services.create( 'alpine', name='target-svc', mode='global', command='sh -c "date +%s ; sleep 3600"', constraints=['node.hostname != non-existing-node'], container_labels={'container.label': 'testing'}, dns_config={'Nameservers': ['8.8.8.8']}, endpoint_spec=EndpointSpec(mode='vip', ports={14002: 1234}), env=['TEST_ENV_VAR=12345'], labels={'service.label': 'on-service'}, mounts=['/tmp:/data/hosttmp:ro'], networks=[network.name], resources=Resources(mem_limit=24000000), restart_policy=RestartPolicy(condition='any', delay=5, max_attempts=3), secrets=[ SecretReference(secret_id=secret.id, secret_name=secret.name) ], stop_grace_period=1, update_config=UpdateConfig(parallelism=1, delay=1, monitor=7200000000), user='******', workdir='/data/hosttmp', tty=True) self.wait_for_service_start(service, num_tasks=2) service.reload() initial_spec = service.attrs['Spec'] command = ['--template #ok', '--restart target-svc', '--one-shot'] self.remote_client.containers.run( 'pygen-build', command=' '.join(command), remove=True, volumes=['/var/run/docker.sock:/var/run/docker.sock:ro']) self.wait_for_service_start(service, num_tasks=4) service = self.remote_client.services.get(service.id) service.reload() newer_spec = service.attrs['Spec'] del initial_spec['TaskTemplate']['ForceUpdate'] del newer_spec['TaskTemplate']['ForceUpdate'] initial_networks = initial_spec.pop( 'Networks', initial_spec['TaskTemplate'].pop('Networks', [])) newer_networks = newer_spec.pop( 'Networks', newer_spec['TaskTemplate'].pop('Networks', [])) self.maxDiff = None self.assertGreater(len(newer_networks), 0) self.assertEqual(newer_networks, initial_networks) self.assertDictEqual(newer_spec, initial_spec)
def make_service( name, cmd, cmd_args, image, image_pull_policy='Always', image_pull_secret=None, port_list=None, env=[], networks=[], working_dir=None, configs=None, volumes=None, volume_mounts=None, labels={}, cpu_limit=None, cpu_guarantee=None, mem_limit=None, mem_guarantee=None, gpu_limit=None, gpu_guarantee=None, ): args = (image, ) kwargs = {} kwargs['name'] = name kwargs['command'] = cmd kwargs['args'] = cmd_args kwargs['container_labels'] = labels kwargs['env'] = ['{0}={1}'.format(i['name'], i['value']) for i in env] if env else [] kwargs['labels'] = labels volume_d = {} config_d = {} volume_config_name_map = {} for _vol in volumes: if 'configMap' in _vol: config_prefix = _vol['configMap']['name'] config_d[config_prefix] = {} volume_config_name_map[_vol['name']] = config_prefix for config_item in _vol['configMap']['items']: config_name = '{cfg_prefix}-{cfg_name}'.format( cfg_prefix=config_prefix, cfg_name=config_item['key']) config_d[config_prefix][config_name] = {} config_d[config_prefix][config_name][ 'config_name'] = config_name config_d[config_prefix][config_name]['filename'] = config_item[ 'path'] else: volume_d[_vol['name']] = {} volume_d[_vol['name']]['vol'] = _vol for _mount in volume_mounts: if _mount['name'] in volume_config_name_map: for _, config_i in config_d[volume_config_name_map[ _mount['name']]].items(): config_i['filename'] = os.path.join(_mount['mountPath'], config_i['filename']) else: volume_d[_mount['name']]['mount'] = _mount # mounts # Mounts for the containers, # in the form source:target:options, where options is either ro or rw mounts = [] for _, vol in volume_d.items(): if 'hostPath' in vol['vol']: _src = vol['vol']['hostPath']['path'] _target = vol['mount']['mountPath'] _opt = 'ro' if vol['mount']['readOnly'] else 'rw' _mount_str = f"{_src}:{_target}:{_opt}" mounts.append(_mount_str) kwargs['mounts'] = mounts # config_refs # List of ConfigReference that will be exposed to the service. configs_name_map = {i.name: i for i in configs} config_refs = [] for _, config_items in config_d.items(): for _, config in config_items.items(): config['config_id'] = configs_name_map[config['config_name']].id config_ref = ConfigReference(config['config_id'], config['config_name'], config['filename']) config_refs.append(config_ref) kwargs['configs'] = config_refs # resources # Resource limits and reservations. # resources = Resources( # cpu_limit=cpu_limit, # cpu_reservation=cpu_guarantee, # ) # kwargs['resources'] = resources kwargs['restart_policy'] = RestartPolicy() # TODO: replace hardcode network neuf-system kwargs['networks'] = networks return args, kwargs