def swarm_resources(self): if self.resources is None: return None else: if self.resources.get('enable_gpu', False): return Resources( cpu_limit=int(self.resources['limits']['cpu'] * self._CPU_RATE), mem_limit=self.resources['limits']['memory'] * self._MEM_RATE, cpu_reservation=int(self.resources['requests']['cpu'] * self._CPU_RATE), mem_reservation=self.resources['requests']['memory'] * self._MEM_RATE, generic_resources={'gpu': 1}) else: return Resources( cpu_limit=int(self.resources['limits']['cpu'] * self._CPU_RATE), mem_limit=self.resources['limits']['memory'] * self._MEM_RATE, cpu_reservation=int(self.resources['requests']['cpu'] * self._CPU_RATE), mem_reservation=self.resources['requests']['memory'] * self._MEM_RATE)
def swarm_resources(self): if self.resources is None: return None cpu_limit = self.resources.get('limits', {}).get('cpu') mem_limit = self.resources.get('limits', {}).get('memory') cpu_reservation = self.resources.get('requests', {}).get('cpu') mem_reservation = self.resources.get('requests', {}).get('memory') res = self.cleaner({ 'cpu_limit': int(cpu_limit * self._CPU_RATE) if cpu_limit else None, 'mem_limit': mem_limit * self._MEM_RATE if mem_limit else None, 'cpu_reservation': int(cpu_reservation * self._CPU_RATE) if cpu_reservation else None, 'mem_reservation': mem_reservation * self._MEM_RATE if mem_reservation else None, 'generic_resources': { 'gpu': 1 } if self.resources.get('enable_gpu', False) else None }) return Resources(**res)
def create_object(self): """Start the single-user server in a docker service.""" container_kwargs = dict( image=self.image, env=self.get_env(), args=(yield self.get_command()), mounts=self.mounts, ) container_kwargs.update(self.extra_container_spec) container_spec = ContainerSpec(**container_kwargs) for mount in self.mounts: if mount['Target'] == '/home/jovyan/work': if not os.path.exists(mount['Source']): os.makedirs(mount['Source']) resources_kwargs = dict( mem_limit=self.mem_limit, mem_reservation=self.mem_guarantee, cpu_limit=int(self.cpu_limit * 1e9) if self.cpu_limit else None, cpu_reservation=int(self.cpu_guarantee * 1e9) if self.cpu_guarantee else None, ) resources_kwargs.update(self.extra_resources_spec) resources_spec = Resources(**resources_kwargs) placement_kwargs = dict( constraints=None, preferences=None, platforms=None, ) placement_kwargs.update(self.extra_placement_spec) placement_spec = Placement(**placement_kwargs) task_kwargs = dict( container_spec=container_spec, resources=resources_spec, networks=[self.network_name] if self.network_name else [], placement=placement_spec, ) task_kwargs.update(self.extra_task_spec) task_spec = TaskTemplate(**task_kwargs) endpoint_kwargs = {} if not self.use_internal_ip: endpoint_kwargs["ports"] = {None: (self.port, "tcp")} endpoint_kwargs.update(self.extra_endpoint_spec) endpoint_spec = EndpointSpec(**endpoint_kwargs) create_kwargs = dict(task_template=task_spec, endpoint_spec=endpoint_spec, name=self.service_name) create_kwargs.update(self.extra_create_kwargs) return (yield self.docker("create_service", **create_kwargs))
def create_object(self): """Start the single-user server in a docker service.""" container_kwargs = dict( image=self.image, env=self.get_env(), args=(yield self.get_command()), mounts=self.mounts, ) container_kwargs.update(self.extra_container_spec) container_spec = ContainerSpec(**container_kwargs) resources_kwargs = dict( mem_limit=self.mem_limit, mem_reservation=self.mem_guarantee, cpu_limit=int(self.cpu_limit * 1e9) if self.cpu_limit else None, cpu_reservation=int(self.cpu_guarantee * 1e9) if self.cpu_guarantee else None, ) resources_kwargs.update(self.extra_resources_spec) resources_spec = Resources(**resources_kwargs) placement_kwargs = dict( constraints=None, preferences=None, platforms=None, ) placement_kwargs.update(self.extra_placement_spec) placement_spec = Placement(**placement_kwargs) task_kwargs = dict( container_spec=container_spec, resources=resources_spec, networks=[self.network_name] if self.network_name else [], placement=placement_spec, ) task_kwargs.update(self.extra_task_spec) task_spec = TaskTemplate(**task_kwargs) endpoint_kwargs = {} if not self.use_internal_ip: endpoint_kwargs["ports"] = {None: (self.port, "tcp")} endpoint_kwargs.update(self.extra_endpoint_spec) endpoint_spec = EndpointSpec(**endpoint_kwargs) create_kwargs = dict(task_template=task_spec, endpoint_spec=endpoint_spec, name=self.service_name) create_kwargs.update(self.extra_create_kwargs) result = yield self.docker("create_service", **create_kwargs) # Chenglu added: inspect_service right after create_servce may raise # Service not found error yield gen.sleep(1) self.log.debug("Docker >>> create_service with %s", json.dumps(create_kwargs)) return result
def createService(image, command, cpuRequirments, name, labels, selectedNodeId): client = docker.from_env() cpuRequirmentsInNanoSeconds = cpuRequirments * 1000000000 client.services.create( image, command, constraints=["node.role == worker", "node.id == " + selectedNodeId], mode=ServiceMode("replicated", 1), restart_policy=RestartPolicy(condition='none'), resources=Resources(cpu_reservation=cpuRequirmentsInNanoSeconds), name=name, labels = {"instance_name": labels.get("instance_name")}, hostname = selectedNodeId )
def create_object(self): """Start the single-user server in a docker service.""" container_kwargs = dict( image=self.image, env=self.get_env(), args=(yield self.get_command()), mounts=self.mounts, ) container_kwargs.update(self.extra_container_spec) container_spec = ContainerSpec(**container_kwargs) resources_kwargs = dict( mem_limit=self.mem_limit, mem_reservation=self.mem_guarantee, cpu_limit=int(self.cpu_limit * 1e9) if self.cpu_limit else None, cpu_reservation=int(self.cpu_guarantee * 1e9) if self.cpu_guarantee else None, ) resources_kwargs.update(self.extra_resources_spec) resources_spec = Resources(**resources_kwargs) task_kwargs = dict( container_spec=container_spec, resources=resources_spec, networks=[self.network_name] if self.network_name else [], ) task_kwargs.update(self.extra_task_spec) task_spec = TaskTemplate(**task_kwargs) endpoint_kwargs = {} if not self.use_internal_ip: endpoint_kwargs["ports"] = {None: (self.port, "tcp")} endpoint_kwargs.update(self.extra_endpoint_spec) endpoint_spec = EndpointSpec(**endpoint_kwargs) create_kwargs = dict(task_template=task_spec, endpoint_spec=endpoint_spec, name=self.service_name) create_kwargs.update(self.extra_create_kwargs) return (yield self.docker("create_service", **create_kwargs))
def create_object(self): """Start the single-user server in a docker service.""" container_kwargs = dict( image=self.image, env=self.get_env(), args=(yield self.get_command()), mounts=self.mounts, ) container_kwargs.update(self.extra_container_spec) container_spec = ContainerSpec(**container_kwargs) resources_kwargs = dict( mem_limit=self.mem_limit, mem_reservation=self.mem_guarantee, cpu_limit=int(self.cpu_limit * 1e9) if self.cpu_limit else None, cpu_reservation=int( self.cpu_guarantee * 1e9 ) if self.cpu_guarantee else None, ) resources_kwargs.update(self.extra_resources_spec) resources_spec = Resources(**resources_kwargs) placement_kwargs = dict( constraints=None, preferences=None, platforms=None, ) placement_kwargs.update(self.extra_placement_spec) placement_spec = Placement(**placement_kwargs) task_kwargs = dict( container_spec=container_spec, resources=resources_spec, networks=[self.network_name] if self.network_name else [], placement=placement_spec, ) task_kwargs.update(self.extra_task_spec) task_spec = TaskTemplate(**task_kwargs) endpoint_kwargs = {} if not self.use_internal_ip: endpoint_kwargs["ports"] = {None: (self.port, "tcp")} endpoint_kwargs.update(self.extra_endpoint_spec) endpoint_spec = EndpointSpec(**endpoint_kwargs) create_kwargs = dict( task_template=task_spec, endpoint_spec=endpoint_spec, name=self.service_name ) create_kwargs.update(self.extra_create_kwargs) service = yield self.docker("create_service", **create_kwargs) while True: tasks = yield self.docker( "tasks", filters={"service": self.service_name}, ) if len(tasks) > 0: break yield gen.sleep(1.0) return service
def start(self): """Start the single-user server in a docker service. You can specify the params for the service through jupyterhub_config.py or using the user_options """ self.log.debug("User: {}, start spawn".format(self.user.__dict__)) # https://github.com/jupyterhub/jupyterhub # /blob/master/jupyterhub/user.py#L202 # By default jupyterhub calls the spawner passing user_options if self.use_user_options: user_options = self.user_options else: user_options = {} service = yield self.get_service() if service is None: # Validate state if hasattr(self, "container_spec") and self.container_spec is not None: container_spec = dict(**self.container_spec) elif user_options == {}: self.log.error("User: {} is trying to create a service" " without a container_spec".format(self.user)) raise Exception("That notebook is missing a specification" "to launch it, contact the admin to resolve " "this issue") # Setup service container_spec.update(user_options.get("container_spec", {})) # Which image to spawn if self.use_user_options and "user_selected_image" in user_options: self.log.debug( "User options received: {}".format(user_options)) image_name = user_options["user_selected_name"] image_value = user_options["user_selected_image"] selected_image = None for di in self.images: if image_name == di["name"] and image_value == di["image"]: selected_image = copy.deepcopy(di) if selected_image is None: err_msg = "User selected image: {} couldn't be found".format( image_value) self.log.error(err_msg) raise Exception(err_msg) self.log.info( "Using the user selected image: {}".format(selected_image)) else: # Default image selected_image = self.images[0] self.log.info( "Using the default image: {}".format(selected_image)) self.log.debug("Image info: {}".format(selected_image)) # Does that image have restricted access if "access" in selected_image: # Check for static or db users allowed = False if self.service_owner in selected_image["access"]: allowed = True else: if os.path.exists(selected_image["access"]): db_path = selected_image["access"] try: self.log.info("Checking db: {} for " "User: {}".format( db_path, self.service_owner)) with open(db_path, "r") as db: users = [ user.rstrip("\n").rstrip("\r\n") for user in db ] if self.service_owner in users: allowed = True except IOError as err: self.log.error("User: {} tried to open db file {}," "Failed {}".format( self.service_owner, db_path, err)) if not allowed: self.log.error( "User: {} tried to launch {} without access".format( self.service_owner, selected_image["image"])) raise Exception( "You don't have permission to launch that image") self.log.debug("Container spec: {}".format(container_spec)) # Assign the image name as a label container_spec["labels"] = {"image_name": selected_image["name"]} # Setup mounts mounts = [] # Global mounts if "mounts" in container_spec: mounts.extend(container_spec["mounts"]) container_spec["mounts"] = [] # Image mounts if "mounts" in selected_image: mounts.extend(selected_image["mounts"]) for mount in mounts: if isinstance(mount, dict): m = VolumeMounter(mount) m = yield m.create(owner=self.service_owner) else: # Expects a mount_class that supports 'create' if hasattr(self.user, "data"): m = yield mount.create(self.user.data, owner=self.service_owner) else: m = yield mount.create(owner=self.service_owner) container_spec["mounts"].append(m) # Some envs are required by the single-user-image if "env" in container_spec: container_spec["env"].update(self.get_env()) else: container_spec["env"] = self.get_env() # Env of image if "env" in selected_image and isinstance(selected_image["env"], dict): container_spec["env"].update(selected_image["env"]) # Dynamic update of env values for env_key, env_value in container_spec["env"].items(): stripped_value = env_value.lstrip("{").rstrip("}") if hasattr(self, stripped_value) and isinstance( getattr(self, stripped_value), str): container_spec["env"][env_key] = getattr( self, stripped_value) if hasattr(self.user, stripped_value) and isinstance( getattr(self.user, stripped_value), str): container_spec["env"][env_key] = getattr( self.user, stripped_value) if (hasattr(self.user, "data") and hasattr(self.user.data, stripped_value) and isinstance(getattr(self.user.data, stripped_value), str)): container_spec["env"][env_key] = getattr( self.user.data, stripped_value) # Args of image if "args" in selected_image and isinstance(selected_image["args"], list): container_spec.update({"args": selected_image["args"]}) if ("command" in selected_image and isinstance(selected_image["command"], list) or "command" in selected_image and isinstance(selected_image["command"], str)): container_spec.update({"command": selected_image["command"]}) # Log mounts config self.log.debug("User: {} container_spec mounts: {}".format( self.user, container_spec["mounts"])) # Global resource_spec resource_spec = {} if hasattr(self, "resource_spec"): resource_spec = self.resource_spec resource_spec.update(user_options.get("resource_spec", {})) networks = None if hasattr(self, "networks"): networks = self.networks if user_options.get("networks") is not None: networks = user_options.get("networks") # Global Log driver log_driver = None if hasattr(self, "log_driver"): log_driver = self.log_driver if user_options.get("log_driver") is not None: log_driver = user_options.get("log_driver") accelerators = [] if hasattr(self, "accelerators"): accelerators = self.accelerators if user_options.get("accelerators") is not None: accelerators = user_options.get("accelerators") # Global placement placement = None if hasattr(self, "placement"): placement = self.placement if user_options.get("placement") is not None: placement = user_options.get("placement") # Image to spawn image = selected_image["image"] # Image resources if "resource_spec" in selected_image: resource_spec = selected_image["resource_spec"] # Accelerators attached to the image if "accelerators" in selected_image: accelerators = selected_image["accelerators"] # Placement of image if "placement" in selected_image: placement = selected_image["placement"] # Logdriver of image if "log_driver" in selected_image: log_driver = selected_image["log_driver"] # Configs attached to image if "configs" in selected_image and isinstance( selected_image["configs"], list): for c in selected_image["configs"]: if isinstance(c, dict): self.configs.append(c) endpoint_spec = {} if "endpoint_spec" in selected_image: endpoint_spec = selected_image["endpoint_spec"] if self.configs: # Check that the supplied configs already exists current_configs = yield self.docker("configs") config_error_msg = ( "The server has a misconfigured config, " "please contact an administrator to resolve this") for c in self.configs: if "config_name" not in c: self.log.error("Config: {} does not have a " "required config_name key".format(c)) raise Exception(config_error_msg) if "config_id" not in c: # Find the id from the supplied name config_ids = [ cc["ID"] for cc in current_configs if cc["Spec"]["Name"] == c["config_name"] ] if not config_ids: self.log.error( "A config with name {} could not be found") raise Exception(config_error_msg) c["config_id"] = config_ids[0] container_spec.update( {"configs": [ConfigReference(**c) for c in self.configs]}) # Prepare the accelerators and attach it to the environment if accelerators: for accelerator in accelerators: accelerator_id = accelerator.aquire(self.user.name) # NVIDIA_VISIBLE_DEVICES=0:0 container_spec["env"][ "NVIDIA_VISIBLE_DEVICES"] = "{}".format(accelerator_id) # Global container user uid_gid = None if "uid_gid" in container_spec: uid_gid = copy.deepcopy(container_spec["uid_gid"]) del container_spec["uid_gid"] # Image user if "uid_gid" in selected_image: uid_gid = selected_image["uid_gid"] self.log.info("gid info {}".format(uid_gid)) if isinstance(uid_gid, str): if ":" in uid_gid: uid, gid = uid_gid.split(":") else: uid, gid = uid_gid, None if (uid == "{uid}" and hasattr(self.user, "uid") and self.user.uid is not None): uid = self.user.uid if (gid is not None and gid == "{gid}" and hasattr(self.user, "gid") and self.user.gid is not None): gid = self.user.gid if uid: container_spec.update({"user": str(uid)}) if uid and gid: container_spec.update({"user": str(uid) + ":" + str(gid)}) # Global container user if "user" in container_spec: container_spec["user"] = str(container_spec["user"]) # Image user if "user" in selected_image: container_spec.update({"user": str(selected_image["user"])}) dynamic_holders = [Spawner, self, self.user] if hasattr(self.user, "data"): dynamic_holders.append(self.user.data) # Expand container_spec before start for construct in dynamic_holders: try: if not hasattr(construct, "__dict__"): continue recursive_format(container_spec, construct.__dict__) except TypeError: pass # Log driver log_driver_name, log_driver_options = None, None if log_driver and isinstance(log_driver, dict): if "name" in log_driver: log_driver_name = log_driver["name"] if "options" in log_driver: log_driver_options = log_driver["options"] # Create the service container_spec = ContainerSpec(image, **container_spec) resources = Resources(**resource_spec) placement = Placement(**placement) task_log_driver = None if log_driver_name: task_log_driver = DriverConfig(log_driver_name, options=log_driver_options) task_spec = { "container_spec": container_spec, "resources": resources, "placement": placement, } if task_log_driver: task_spec.update({"log_driver": task_log_driver}) task_tmpl = TaskTemplate(**task_spec) self.log.debug("task temp: {}".format(task_tmpl)) # Set endpoint spec endpoint_spec = EndpointSpec(**endpoint_spec) resp = yield self.docker( "create_service", task_tmpl, name=self.service_name, networks=networks, endpoint_spec=endpoint_spec, ) self.service_id = resp["ID"] self.log.info("Created Docker service {} (id: {}) from image {}" " for user {}".format(self.service_name, self.service_id[:7], image, self.user)) yield self.wait_for_running_tasks() else: self.log.info("Found existing Docker service '{}' (id: {})".format( self.service_name, self.service_id[:7])) # Handle re-using API token. # Get the API token from the environment variables # of the running service: envs = service["Spec"]["TaskTemplate"]["ContainerSpec"]["Env"] for line in envs: if line.startswith("JPY_API_TOKEN="): self.api_token = line.split("=", 1)[1] break ip = self.service_name port = self.service_port self.log.debug("Active service: '{}' with user '{}'".format( self.service_name, self.user)) # we use service_name instead of ip # https://docs.docker.com/engine/swarm/networking/#use-swarm-mode-service-discovery # service_port is actually equal to 8888 return ip, port
server = UnixServer(address) client = docker.from_env() atexit.register(removeservice, docker_client=client) while (True): try: message_received = server.read_data() image_name = "127.0.0.1:5000/r3_assembly_coordinator:{}".format( message_received) workertask_name = "127.0.0.1:5000/workertask9" if message_received == "no_cushion": workertask_service = client.services.list( filters={"name": "robot3_workertask9"}) if len(workertask_service) != 0: workertask_service[0].remove() else: workertask_service = client.services.list( filters={"name": "robot3_workertask9"}) if len(workertask_service) == 0: client.services.create(image=workertask_name, name="robot3_workertask9", constraints=["node.hostname==GOCASr3"], resources=Resources(mem_limit=52428800), mounts=["/tmp:/tmp:rw"], env=["PYTHONUNBUFFERED=1"]) print(image_name) service = client.services.list( filters={"name": "robot3_r3_assembly_coordinator"}) service[0].update(image=image_name) except ConnectionResetError as e: print(e)
def gen_director_service_params( # pylint: disable=unused-argument client: DockerClient, site_id: int, site_data: Dict[str, Any]) -> Dict[str, Any]: extra_env = { "PORT": "80", "HOST": "0.0.0.0", } params = gen_director_shared_params(client, site_id, site_data) env = params.pop("env", []) env.extend("{}={}".format(name, val) for name, val in extra_env.items()) # We do the run.sh path detection in the shell so that it can adapt to the path changing without # updating the Docker service # The killing of the child process is based off of # https://unix.stackexchange.com/a/146770/306760 shell_command = """date +'DIRECTOR: Starting server at %Y-%m-%d %H:%M:%S %Z' # See docs/UMASK.md before touching this umask "$1" for path in /site/run.sh /site/private/run.sh /site/public/run.sh; do if [ -x "$path" ]; then term() { date +'DIRECTOR: Stopping server at %Y-%m-%d %H:%M:%S %Z' kill "$child" } trap term TERM "$path" & child="$!" while ! wait; do true; done exec date +'DIRECTOR: Stopped server at %Y-%m-%d %H:%M:%S %Z' fi done echo 'DIRECTOR: No run.sh file found -- if it exists, make sure it is set as executable' exec sleep 2147483647""" params.update({ "name": get_director_service_name(site_id), "read_only": True, # See docs/UMASK.md before touching this "command": ["sh", "-c", shell_command, "sh", oct(settings.SITE_UMASK)[2:]], "workdir": "/site/public", "networks": ["director-sites"], "resources": Resources( # 0.1 CPUs, 100M or so of memory cpu_limit=convert_cpu_limit(site_data["resource_limits"]["cpus"]), mem_limit=convert_memory_limit( site_data["resource_limits"]["mem_limit"]), ), "env": env, "log_driver": "json-file", "log_driver_options": { # Keep minimal logs "max-size": "500k", "max-file": "1", }, "hosts": params.pop("extra_hosts"), "stop_grace_period": 3, "endpoint_spec": EndpointSpec(mode="vip", ports={}), "mode": ServiceMode(mode="replicated", replicas=1 if site_data["is_being_served"] else 0), "restart_policy": RestartPolicy(condition="any", delay=5, max_attempts=5, window=0), "update_config": UpdateConfig( parallelism=1, order="stop-first", failure_action="rollback", max_failure_ratio=0, # delay and monitor are in nanoseconds (1e9 seconds) delay=int(5 * (10**9)), monitor=int(5 * (10**9)), ), }) return params
List: List of Services """ return client.services.list(filters={"label": [stack_name]}) # return sorted(client.services.list(filters={"label": [check_label]}), key=lambda x: get_created(x), reverse=True) def get_nodes(): """Returns a list of nodes in the swarm Returns: List: List of Nodes """ return client.nodes.list() resources = Resources(mem_limit='1.5g') def create_service(username): """Created a new server service Args: username (string): The username for the OP user Returns: Service: The mc server service """ # global port_last # vol = {check_label+"_user_"+username: {'bind': '/server', 'mode': 'rw'}} if username != None else False env = [f"OP_USERNAME={username}"
def test_restart_retains_settings(self): test_network = self.create_network('pygen-swarm-test', driver='overlay') test_secret = self.create_secret('pygen-secret', 'TopSecret') test_service = self.start_service( image=os.environ.get('TEST_IMAGE', 'alpine'), command='sh', args=['-c', 'sleep 3600'], constraints=['node.role==manager'], container_labels={'pygen.container.label': 'label-on-container'}, endpoint_spec={ 'Ports': [{ 'Protocol': 'tcp', 'PublishedPort': 8080, 'TargetPort': 5000 }] }, env=['PYGEN_CONTAINER_ENV=env-on-container'], hostname='pygen-swarm-test-512', labels={'pygen.service.label': 'label-on-service'}, mode={'Replicated': { 'Replicas': 2 }}, mounts=['/var:/hostvar:ro'], networks=[test_network.id], resources=Resources(mem_limit=8128128), restart_policy=dict(condition='on-failure', delay=3), secrets=[ SecretReference(secret_id=test_secret.id, secret_name=test_secret.name) ], stop_grace_period=1, update_config=dict(parallelism=12, delay=7), user='******', workdir='/hostvar') self.wait_for_service_running(test_service) initial_service = self.api.services(desired_task_state='').matching( test_service.id).first_value def verify_all(service): self.assertIsNotNone(service) self.assertGreaterEqual(len(service.tasks), 2) self.assertEqual( service.image, test_service.attrs['Spec']['TaskTemplate'] ['ContainerSpec']['Image']) self.assertEqual(service.name, test_service.name) self.assertEqual( len(service.raw.attrs['Spec']['EndpointSpec']['Ports']), 1) self.assertEqual( service.raw.attrs['Spec']['EndpointSpec']['Ports'][0] ['TargetPort'], 5000) self.assertEqual( service.raw.attrs['Spec']['EndpointSpec']['Ports'][0] ['PublishedPort'], 8080) self.assertEqual(service.raw.attrs['Spec']['Labels'], {'pygen.service.label': 'label-on-service'}) self.assertEqual( service.raw.attrs['Spec']['UpdateConfig']['Delay'], 7) self.assertEqual( service.raw.attrs['Spec']['UpdateConfig']['Parallelism'], 12) self.assertIn('Replicated', service.raw.attrs['Spec']['Mode']) self.assertEqual( service.raw.attrs['Spec']['Mode']['Replicated']['Replicas'], 2) self.assertIn( test_network.id, (n.get('Target') for n in service.raw.attrs['Spec']['TaskTemplate'].get( 'Networks', service.raw.attrs['Spec'].get('Networks', {})) )) task_template = service.raw.attrs['Spec']['TaskTemplate'] self.assertEqual(task_template['Placement']['Constraints'], ['node.role==manager']) self.assertEqual(task_template['ContainerSpec']['Command'], ['sh']) self.assertEqual(task_template['ContainerSpec']['Args'], ['-c', 'sleep 3600']) self.assertEqual(len(task_template['ContainerSpec']['Secrets']), 1) self.assertEqual( task_template['ContainerSpec']['Secrets'][0]['SecretID'], test_secret.id) self.assertEqual( task_template['ContainerSpec']['Secrets'][0]['SecretName'], test_secret.name) self.assertEqual( task_template['ContainerSpec']['Secrets'][0]['File']['Name'], test_secret.name) six.assertRegex(self, task_template['ContainerSpec']['Image'], '^%s' % os.environ.get('TEST_IMAGE', 'alpine')) self.assertEqual(task_template['ContainerSpec']['Hostname'], 'pygen-swarm-test-512') self.assertEqual(task_template['ContainerSpec']['Labels'], {'pygen.container.label': 'label-on-container'}) self.assertEqual(task_template['ContainerSpec']['User'], 'root') self.assertEqual(task_template['ContainerSpec']['Env'], ['PYGEN_CONTAINER_ENV=env-on-container']) self.assertEqual(len(task_template['ContainerSpec']['Mounts']), 1) self.assertEqual( task_template['ContainerSpec']['Mounts'][0]['Source'], '/var') self.assertEqual( task_template['ContainerSpec']['Mounts'][0]['Target'], '/hostvar') self.assertTrue( task_template['ContainerSpec']['Mounts'][0]['ReadOnly']) self.assertEqual(task_template['ContainerSpec']['StopGracePeriod'], 1) self.assertEqual(task_template['ContainerSpec']['Dir'], '/hostvar') self.assertTrue(task_template['RestartPolicy']['Condition'], 'on-failure') self.assertTrue(task_template['RestartPolicy']['Delay'], 3) self.assertTrue( task_template['Resources']['Limits']['MemoryBytes'], 8128128) verify_all(initial_service) initial_service.raw.update(force_update=20) self._wait_for_tasks(test_service, 4) current_service = self.api.services(desired_task_state='').matching( test_service.id).first_value self.assertGreater(current_service.version, initial_service.version) self.assertNotEqual(set(t.id for t in current_service.tasks), set(t.id for t in initial_service.tasks)) verify_all(current_service)
def test_restart_service_retains_settings(self): from docker.types import EndpointSpec, Resources, RestartPolicy, SecretReference, UpdateConfig join_command = self.init_swarm() with self.with_dind_container() as second_dind: self.prepare_images('alpine', client=self.dind_client(second_dind)) second_dind.exec_run(join_command) network = self.remote_client.networks.create('pygen-net', driver='overlay') secret = self.remote_client.secrets.create(name='pygen-secret', data='TopSecret') secret.reload() service = self.remote_client.services.create( 'alpine', name='target-svc', mode='global', command='sh -c "date +%s ; sleep 3600"', constraints=['node.hostname != non-existing-node'], container_labels={'container.label': 'testing'}, dns_config={'Nameservers': ['8.8.8.8']}, endpoint_spec=EndpointSpec(mode='vip', ports={14002: 1234}), env=['TEST_ENV_VAR=12345'], labels={'service.label': 'on-service'}, mounts=['/tmp:/data/hosttmp:ro'], networks=[network.name], resources=Resources(mem_limit=24000000), restart_policy=RestartPolicy(condition='any', delay=5, max_attempts=3), secrets=[ SecretReference(secret_id=secret.id, secret_name=secret.name) ], stop_grace_period=1, update_config=UpdateConfig(parallelism=1, delay=1, monitor=7200000000), user='******', workdir='/data/hosttmp', tty=True) self.wait_for_service_start(service, num_tasks=2) service.reload() initial_spec = service.attrs['Spec'] command = ['--template #ok', '--restart target-svc', '--one-shot'] self.remote_client.containers.run( 'pygen-build', command=' '.join(command), remove=True, volumes=['/var/run/docker.sock:/var/run/docker.sock:ro']) self.wait_for_service_start(service, num_tasks=4) service = self.remote_client.services.get(service.id) service.reload() newer_spec = service.attrs['Spec'] del initial_spec['TaskTemplate']['ForceUpdate'] del newer_spec['TaskTemplate']['ForceUpdate'] initial_networks = initial_spec.pop( 'Networks', initial_spec['TaskTemplate'].pop('Networks', [])) newer_networks = newer_spec.pop( 'Networks', newer_spec['TaskTemplate'].pop('Networks', [])) self.maxDiff = None self.assertGreater(len(newer_networks), 0) self.assertEqual(newer_networks, initial_networks) self.assertDictEqual(newer_spec, initial_spec)
def start(self): """Start the single-user server in a docker service. You can specify the params for the service through jupyterhub_config.py or using the user_options """ self.log.info("User: {}, start spawn".format(self.user.__dict__)) # https://github.com/jupyterhub/jupyterhub # /blob/master/jupyterhub/user.py#L202 # By default jupyterhub calls the spawner passing user_options if self.use_user_options: user_options = self.user_options else: user_options = {} service = yield self.get_service() if service is None: # Validate state if hasattr(self, 'container_spec') \ and self.container_spec is not None: container_spec = dict(**self.container_spec) elif user_options == {}: self.log.error("User: {} is trying to create a service" " without a container_spec".format(self.user)) raise Exception("That notebook is missing a specification" "to launch it, contact the admin to resolve " "this issue") # Setup service container_spec.update(user_options.get('container_spec', {})) # Which image to spawn if self.use_user_options and 'user_selected_image' in user_options: uimage = user_options['user_selected_image'] image_info = None for di in self.dockerimages: if di['image'] == uimage: image_info = copy.deepcopy(di) if image_info is None: err_msg = "User selected image: {} couldn't be found" \ .format(uimage['image']) self.log.error(err_msg) raise Exception(err_msg) else: # Default image image_info = self.dockerimages[0] self.log.debug("Image info: {}".format(image_info)) # Does that image have restricted access if 'access' in image_info: # Check for static or db users allowed = False if self.service_owner in image_info['access']: allowed = True else: if os.path.exists(image_info['access']): db_path = image_info['access'] try: self.log.info("Checking db: {} for " "User: {}".format( db_path, self.service_owner)) with open(db_path, 'r') as db: users = [ user.rstrip('\n').rstrip('\r\n') for user in db ] if self.service_owner in users: allowed = True except IOError as err: self.log.error("User: {} tried to open db file {}," "Failed {}".format( self.service_owner, db_path, err)) if not allowed: self.log.error( "User: {} tried to launch {} without access".format( self.service_owner, image_info['image'])) raise Exception( "You don't have permission to launch that image") self.log.debug("Container spec: {}".format(container_spec)) # Setup mounts mounts = [] # Global mounts if 'mounts' in container_spec: mounts.extend(container_spec['mounts']) container_spec['mounts'] = [] # Image mounts if 'mounts' in image_info: mounts.extend(image_info['mounts']) for mount in mounts: if isinstance(mount, dict): m = VolumeMounter(mount) m = yield m.create(owner=self.service_owner) else: # Expects a mount_class that supports 'create' if hasattr(self.user, 'data'): m = yield mount.create(self.user.data, owner=self.service_owner) else: m = yield mount.create(owner=self.service_owner) container_spec['mounts'].append(m) # Some envs are required by the single-user-image if 'env' in container_spec: container_spec['env'].update(self.get_env()) else: container_spec['env'] = self.get_env() # Env of image if 'env' in image_info and isinstance(image_info['env'], dict): container_spec['env'].update(image_info['env']) # Dynamic update of env values for env_key, env_value in container_spec['env'].items(): stripped_value = env_value.lstrip('{').rstrip('}') if hasattr(self, stripped_value) \ and isinstance(getattr(self, stripped_value), str): container_spec['env'][env_key] = getattr( self, stripped_value) if hasattr(self.user, stripped_value) \ and isinstance(getattr(self.user, stripped_value), str): container_spec['env'][env_key] = getattr( self.user, stripped_value) if hasattr(self.user, 'data') \ and hasattr(self.user.data, stripped_value)\ and isinstance(getattr(self.user.data, stripped_value), str): container_spec['env'][env_key] = getattr( self.user.data, stripped_value) # Args of image if 'args' in image_info and isinstance(image_info['args'], list): container_spec.update({'args': image_info['args']}) if 'command' in image_info and isinstance(image_info['command'], list)\ or 'command' in image_info and \ isinstance(image_info['command'], str): container_spec.update({'command': image_info['command']}) # Log mounts config self.log.debug("User: {} container_spec mounts: {}".format( self.user, container_spec['mounts'])) # Global resource_spec resource_spec = {} if hasattr(self, 'resource_spec'): resource_spec = self.resource_spec resource_spec.update(user_options.get('resource_spec', {})) networks = None if hasattr(self, 'networks'): networks = self.networks if user_options.get('networks') is not None: networks = user_options.get('networks') # Global placement placement = None if hasattr(self, 'placement'): placement = self.placement if user_options.get('placement') is not None: placement = user_options.get('placement') # Image to spawn image = image_info['image'] # Image resources if 'resource_spec' in image_info: resource_spec = image_info['resource_spec'] # Placement of image if 'placement' in image_info: placement = image_info['placement'] # Configs attached to image if 'configs' in image_info and isinstance(image_info['configs'], list): for c in image_info['configs']: if isinstance(c, dict): self.configs.append(c) if self.configs: # Check that the supplied configs already exists current_configs = yield self.docker('configs') config_error_msg = "The server has a misconfigured config, " \ "please contact an administrator to resolve this" for c in self.configs: if 'config_name' not in c: self.log.error("Config: {} does not have a " "required config_name key".format(c)) raise Exception(config_error_msg) if 'config_id' not in c: # Find the id from the supplied name config_ids = [ cc['ID'] for cc in current_configs if cc['Spec']['Name'] == c['config_name'] ] if not config_ids: self.log.error( "A config with name {} could not be found") raise Exception(config_error_msg) c['config_id'] = config_ids[0] container_spec.update( {'configs': [ConfigReference(**c) for c in self.configs]}) # Global container user uid_gid = None if 'uid_gid' in container_spec: uid_gid = copy.deepcopy(container_spec['uid_gid']) del container_spec['uid_gid'] # Image user if 'uid_gid' in image_info: uid_gid = image_info['uid_gid'] self.log.info("gid info {}".format(uid_gid)) if isinstance(uid_gid, str): if ":" in uid_gid: uid, gid = uid_gid.split(":") else: uid, gid = uid_gid, None if uid == '{uid}' and hasattr(self.user, 'uid') \ and self.user.uid is not None: uid = self.user.uid if gid is not None and gid == '{gid}' \ and hasattr(self.user, 'gid') \ and self.user.gid is not None: gid = self.user.gid if uid: container_spec.update({'user': str(uid)}) if uid and gid: container_spec.update({'user': str(uid) + ":" + str(gid)}) # Global container user if 'user' in container_spec: container_spec['user'] = str(container_spec['user']) # Image user if 'user' in image_info: container_spec.update({'user': str(image_info['user'])}) dynamic_holders = [Spawner, self, self.user] if hasattr(self.user, 'data'): dynamic_holders.append(self.user.data) # Expand container_spec before start for construct in dynamic_holders: try: if not hasattr(construct, '__dict__'): continue recursive_format(container_spec, construct.__dict__) except TypeError: pass # Create the service container_spec = ContainerSpec(image, **container_spec) resources = Resources(**resource_spec) placement = Placement(**placement) task_spec = { 'container_spec': container_spec, 'resources': resources, 'placement': placement } task_tmpl = TaskTemplate(**task_spec) self.log.info("task temp: {}".format(task_tmpl)) resp = yield self.docker('create_service', task_tmpl, name=self.service_name, networks=networks) self.service_id = resp['ID'] self.log.info("Created Docker service {} (id: {}) from image {}" " for user {}".format(self.service_name, self.service_id[:7], image, self.user)) yield self.wait_for_running_tasks() else: self.log.info("Found existing Docker service '{}' (id: {})".format( self.service_name, self.service_id[:7])) # Handle re-using API token. # Get the API token from the environment variables # of the running service: envs = service['Spec']['TaskTemplate']['ContainerSpec']['Env'] for line in envs: if line.startswith('JPY_API_TOKEN='): self.api_token = line.split('=', 1)[1] break ip = self.service_name port = self.service_port self.log.debug("Active service: '{}' with user '{}'".format( self.service_name, self.user)) # we use service_name instead of ip # https://docs.docker.com/engine/swarm/networking/#use-swarm-mode-service-discovery # service_port is actually equal to 8888 return ip, port