Example #1
0
    def swarm_resources(self):

        if self.resources is None:
            return None
        else:
            if self.resources.get('enable_gpu', False):
                return Resources(
                    cpu_limit=int(self.resources['limits']['cpu'] *
                                  self._CPU_RATE),
                    mem_limit=self.resources['limits']['memory'] *
                    self._MEM_RATE,
                    cpu_reservation=int(self.resources['requests']['cpu'] *
                                        self._CPU_RATE),
                    mem_reservation=self.resources['requests']['memory'] *
                    self._MEM_RATE,
                    generic_resources={'gpu': 1})
            else:
                return Resources(
                    cpu_limit=int(self.resources['limits']['cpu'] *
                                  self._CPU_RATE),
                    mem_limit=self.resources['limits']['memory'] *
                    self._MEM_RATE,
                    cpu_reservation=int(self.resources['requests']['cpu'] *
                                        self._CPU_RATE),
                    mem_reservation=self.resources['requests']['memory'] *
                    self._MEM_RATE)
Example #2
0
    def swarm_resources(self):

        if self.resources is None:
            return None

        cpu_limit = self.resources.get('limits', {}).get('cpu')
        mem_limit = self.resources.get('limits', {}).get('memory')
        cpu_reservation = self.resources.get('requests', {}).get('cpu')
        mem_reservation = self.resources.get('requests', {}).get('memory')

        res = self.cleaner({
            'cpu_limit':
            int(cpu_limit * self._CPU_RATE) if cpu_limit else None,
            'mem_limit':
            mem_limit * self._MEM_RATE if mem_limit else None,
            'cpu_reservation':
            int(cpu_reservation * self._CPU_RATE) if cpu_reservation else None,
            'mem_reservation':
            mem_reservation * self._MEM_RATE if mem_reservation else None,
            'generic_resources': {
                'gpu': 1
            } if self.resources.get('enable_gpu', False) else None
        })

        return Resources(**res)
Example #3
0
    def create_object(self):
        """Start the single-user server in a docker service."""
        container_kwargs = dict(
            image=self.image,
            env=self.get_env(),
            args=(yield self.get_command()),
            mounts=self.mounts,
        )
        container_kwargs.update(self.extra_container_spec)
        container_spec = ContainerSpec(**container_kwargs)

        for mount in self.mounts:
            if mount['Target'] == '/home/jovyan/work':
                if not os.path.exists(mount['Source']):
                    os.makedirs(mount['Source'])

        resources_kwargs = dict(
            mem_limit=self.mem_limit,
            mem_reservation=self.mem_guarantee,
            cpu_limit=int(self.cpu_limit * 1e9) if self.cpu_limit else None,
            cpu_reservation=int(self.cpu_guarantee *
                                1e9) if self.cpu_guarantee else None,
        )
        resources_kwargs.update(self.extra_resources_spec)
        resources_spec = Resources(**resources_kwargs)

        placement_kwargs = dict(
            constraints=None,
            preferences=None,
            platforms=None,
        )
        placement_kwargs.update(self.extra_placement_spec)
        placement_spec = Placement(**placement_kwargs)

        task_kwargs = dict(
            container_spec=container_spec,
            resources=resources_spec,
            networks=[self.network_name] if self.network_name else [],
            placement=placement_spec,
        )
        task_kwargs.update(self.extra_task_spec)
        task_spec = TaskTemplate(**task_kwargs)

        endpoint_kwargs = {}
        if not self.use_internal_ip:
            endpoint_kwargs["ports"] = {None: (self.port, "tcp")}
        endpoint_kwargs.update(self.extra_endpoint_spec)
        endpoint_spec = EndpointSpec(**endpoint_kwargs)

        create_kwargs = dict(task_template=task_spec,
                             endpoint_spec=endpoint_spec,
                             name=self.service_name)
        create_kwargs.update(self.extra_create_kwargs)

        return (yield self.docker("create_service", **create_kwargs))
Example #4
0
    def create_object(self):
        """Start the single-user server in a docker service."""
        container_kwargs = dict(
            image=self.image,
            env=self.get_env(),
            args=(yield self.get_command()),
            mounts=self.mounts,
        )
        container_kwargs.update(self.extra_container_spec)
        container_spec = ContainerSpec(**container_kwargs)

        resources_kwargs = dict(
            mem_limit=self.mem_limit,
            mem_reservation=self.mem_guarantee,
            cpu_limit=int(self.cpu_limit * 1e9) if self.cpu_limit else None,
            cpu_reservation=int(self.cpu_guarantee *
                                1e9) if self.cpu_guarantee else None,
        )
        resources_kwargs.update(self.extra_resources_spec)
        resources_spec = Resources(**resources_kwargs)

        placement_kwargs = dict(
            constraints=None,
            preferences=None,
            platforms=None,
        )
        placement_kwargs.update(self.extra_placement_spec)
        placement_spec = Placement(**placement_kwargs)

        task_kwargs = dict(
            container_spec=container_spec,
            resources=resources_spec,
            networks=[self.network_name] if self.network_name else [],
            placement=placement_spec,
        )
        task_kwargs.update(self.extra_task_spec)
        task_spec = TaskTemplate(**task_kwargs)

        endpoint_kwargs = {}
        if not self.use_internal_ip:
            endpoint_kwargs["ports"] = {None: (self.port, "tcp")}
        endpoint_kwargs.update(self.extra_endpoint_spec)
        endpoint_spec = EndpointSpec(**endpoint_kwargs)

        create_kwargs = dict(task_template=task_spec,
                             endpoint_spec=endpoint_spec,
                             name=self.service_name)
        create_kwargs.update(self.extra_create_kwargs)
        result = yield self.docker("create_service", **create_kwargs)
        # Chenglu added: inspect_service right after create_servce may raise
        # Service not found error
        yield gen.sleep(1)
        self.log.debug("Docker >>> create_service with %s",
                       json.dumps(create_kwargs))
        return result
Example #5
0
def createService(image, command, cpuRequirments, name, labels, selectedNodeId):
    client = docker.from_env()
    cpuRequirmentsInNanoSeconds = cpuRequirments * 1000000000

    client.services.create(
        image,
        command,
        constraints=["node.role == worker", "node.id == " + selectedNodeId],
        mode=ServiceMode("replicated", 1),
        restart_policy=RestartPolicy(condition='none'),
        resources=Resources(cpu_reservation=cpuRequirmentsInNanoSeconds),
        name=name,
        labels = {"instance_name": labels.get("instance_name")},
        hostname = selectedNodeId
    )
Example #6
0
    def create_object(self):
        """Start the single-user server in a docker service."""
        container_kwargs = dict(
            image=self.image,
            env=self.get_env(),
            args=(yield self.get_command()),
            mounts=self.mounts,
        )
        container_kwargs.update(self.extra_container_spec)
        container_spec = ContainerSpec(**container_kwargs)

        resources_kwargs = dict(
            mem_limit=self.mem_limit,
            mem_reservation=self.mem_guarantee,
            cpu_limit=int(self.cpu_limit * 1e9) if self.cpu_limit else None,
            cpu_reservation=int(self.cpu_guarantee *
                                1e9) if self.cpu_guarantee else None,
        )
        resources_kwargs.update(self.extra_resources_spec)
        resources_spec = Resources(**resources_kwargs)

        task_kwargs = dict(
            container_spec=container_spec,
            resources=resources_spec,
            networks=[self.network_name] if self.network_name else [],
        )
        task_kwargs.update(self.extra_task_spec)
        task_spec = TaskTemplate(**task_kwargs)

        endpoint_kwargs = {}
        if not self.use_internal_ip:
            endpoint_kwargs["ports"] = {None: (self.port, "tcp")}
        endpoint_kwargs.update(self.extra_endpoint_spec)
        endpoint_spec = EndpointSpec(**endpoint_kwargs)

        create_kwargs = dict(task_template=task_spec,
                             endpoint_spec=endpoint_spec,
                             name=self.service_name)
        create_kwargs.update(self.extra_create_kwargs)

        return (yield self.docker("create_service", **create_kwargs))
Example #7
0
    def create_object(self):
        """Start the single-user server in a docker service."""
        container_kwargs = dict(
            image=self.image,
            env=self.get_env(),
            args=(yield self.get_command()),
            mounts=self.mounts,
        )
        container_kwargs.update(self.extra_container_spec)
        container_spec = ContainerSpec(**container_kwargs)

        resources_kwargs = dict(
            mem_limit=self.mem_limit,
            mem_reservation=self.mem_guarantee,
            cpu_limit=int(self.cpu_limit * 1e9) if self.cpu_limit else None,
            cpu_reservation=int(
                self.cpu_guarantee * 1e9
            ) if self.cpu_guarantee else None,
        )
        resources_kwargs.update(self.extra_resources_spec)
        resources_spec = Resources(**resources_kwargs)

        placement_kwargs = dict(
            constraints=None,
            preferences=None,
            platforms=None,
        )
        placement_kwargs.update(self.extra_placement_spec)
        placement_spec = Placement(**placement_kwargs)

        task_kwargs = dict(
            container_spec=container_spec,
            resources=resources_spec,
            networks=[self.network_name] if self.network_name else [],
            placement=placement_spec,
        )
        task_kwargs.update(self.extra_task_spec)
        task_spec = TaskTemplate(**task_kwargs)

        endpoint_kwargs = {}
        if not self.use_internal_ip:
            endpoint_kwargs["ports"] = {None: (self.port, "tcp")}
        endpoint_kwargs.update(self.extra_endpoint_spec)
        endpoint_spec = EndpointSpec(**endpoint_kwargs)

        create_kwargs = dict(
            task_template=task_spec, endpoint_spec=endpoint_spec, name=self.service_name
        )
        create_kwargs.update(self.extra_create_kwargs)

        service = yield self.docker("create_service", **create_kwargs)

        while True:
            tasks = yield self.docker(
                "tasks",
                filters={"service": self.service_name},
            )
            if len(tasks) > 0:
                break
            yield gen.sleep(1.0)

        return service
Example #8
0
    def start(self):
        """Start the single-user server in a docker service.
        You can specify the params for the service through
        jupyterhub_config.py or using the user_options
        """
        self.log.debug("User: {}, start spawn".format(self.user.__dict__))

        # https://github.com/jupyterhub/jupyterhub
        # /blob/master/jupyterhub/user.py#L202
        # By default jupyterhub calls the spawner passing user_options
        if self.use_user_options:
            user_options = self.user_options
        else:
            user_options = {}

        service = yield self.get_service()
        if service is None:
            # Validate state
            if hasattr(self,
                       "container_spec") and self.container_spec is not None:
                container_spec = dict(**self.container_spec)
            elif user_options == {}:
                self.log.error("User: {} is trying to create a service"
                               " without a container_spec".format(self.user))
                raise Exception("That notebook is missing a specification"
                                "to launch it, contact the admin to resolve "
                                "this issue")

            # Setup service
            container_spec.update(user_options.get("container_spec", {}))

            # Which image to spawn
            if self.use_user_options and "user_selected_image" in user_options:
                self.log.debug(
                    "User options received: {}".format(user_options))
                image_name = user_options["user_selected_name"]
                image_value = user_options["user_selected_image"]
                selected_image = None
                for di in self.images:
                    if image_name == di["name"] and image_value == di["image"]:
                        selected_image = copy.deepcopy(di)
                if selected_image is None:
                    err_msg = "User selected image: {} couldn't be found".format(
                        image_value)
                    self.log.error(err_msg)
                    raise Exception(err_msg)
                self.log.info(
                    "Using the user selected image: {}".format(selected_image))
            else:
                # Default image
                selected_image = self.images[0]
                self.log.info(
                    "Using the default image: {}".format(selected_image))

            self.log.debug("Image info: {}".format(selected_image))
            # Does that image have restricted access
            if "access" in selected_image:
                # Check for static or db users
                allowed = False
                if self.service_owner in selected_image["access"]:
                    allowed = True
                else:
                    if os.path.exists(selected_image["access"]):
                        db_path = selected_image["access"]
                        try:
                            self.log.info("Checking db: {} for "
                                          "User: {}".format(
                                              db_path, self.service_owner))
                            with open(db_path, "r") as db:
                                users = [
                                    user.rstrip("\n").rstrip("\r\n")
                                    for user in db
                                ]
                                if self.service_owner in users:
                                    allowed = True
                        except IOError as err:
                            self.log.error("User: {} tried to open db file {},"
                                           "Failed {}".format(
                                               self.service_owner, db_path,
                                               err))
                if not allowed:
                    self.log.error(
                        "User: {} tried to launch {} without access".format(
                            self.service_owner, selected_image["image"]))
                    raise Exception(
                        "You don't have permission to launch that image")

            self.log.debug("Container spec: {}".format(container_spec))

            # Assign the image name as a label
            container_spec["labels"] = {"image_name": selected_image["name"]}

            # Setup mounts
            mounts = []
            # Global mounts
            if "mounts" in container_spec:
                mounts.extend(container_spec["mounts"])
            container_spec["mounts"] = []

            # Image mounts
            if "mounts" in selected_image:
                mounts.extend(selected_image["mounts"])

            for mount in mounts:
                if isinstance(mount, dict):
                    m = VolumeMounter(mount)
                    m = yield m.create(owner=self.service_owner)
                else:
                    # Expects a mount_class that supports 'create'
                    if hasattr(self.user, "data"):
                        m = yield mount.create(self.user.data,
                                               owner=self.service_owner)
                    else:
                        m = yield mount.create(owner=self.service_owner)
                container_spec["mounts"].append(m)

            # Some envs are required by the single-user-image
            if "env" in container_spec:
                container_spec["env"].update(self.get_env())
            else:
                container_spec["env"] = self.get_env()

            # Env of image
            if "env" in selected_image and isinstance(selected_image["env"],
                                                      dict):
                container_spec["env"].update(selected_image["env"])

            # Dynamic update of env values
            for env_key, env_value in container_spec["env"].items():
                stripped_value = env_value.lstrip("{").rstrip("}")
                if hasattr(self, stripped_value) and isinstance(
                        getattr(self, stripped_value), str):
                    container_spec["env"][env_key] = getattr(
                        self, stripped_value)
                if hasattr(self.user, stripped_value) and isinstance(
                        getattr(self.user, stripped_value), str):
                    container_spec["env"][env_key] = getattr(
                        self.user, stripped_value)
                if (hasattr(self.user, "data")
                        and hasattr(self.user.data, stripped_value)
                        and isinstance(getattr(self.user.data, stripped_value),
                                       str)):
                    container_spec["env"][env_key] = getattr(
                        self.user.data, stripped_value)

            # Args of image
            if "args" in selected_image and isinstance(selected_image["args"],
                                                       list):
                container_spec.update({"args": selected_image["args"]})

            if ("command" in selected_image
                    and isinstance(selected_image["command"], list)
                    or "command" in selected_image
                    and isinstance(selected_image["command"], str)):
                container_spec.update({"command": selected_image["command"]})

            # Log mounts config
            self.log.debug("User: {} container_spec mounts: {}".format(
                self.user, container_spec["mounts"]))

            # Global resource_spec
            resource_spec = {}
            if hasattr(self, "resource_spec"):
                resource_spec = self.resource_spec
            resource_spec.update(user_options.get("resource_spec", {}))

            networks = None
            if hasattr(self, "networks"):
                networks = self.networks
            if user_options.get("networks") is not None:
                networks = user_options.get("networks")

            # Global Log driver
            log_driver = None
            if hasattr(self, "log_driver"):
                log_driver = self.log_driver
            if user_options.get("log_driver") is not None:
                log_driver = user_options.get("log_driver")

            accelerators = []
            if hasattr(self, "accelerators"):
                accelerators = self.accelerators
            if user_options.get("accelerators") is not None:
                accelerators = user_options.get("accelerators")

            # Global placement
            placement = None
            if hasattr(self, "placement"):
                placement = self.placement
            if user_options.get("placement") is not None:
                placement = user_options.get("placement")

            # Image to spawn
            image = selected_image["image"]

            # Image resources
            if "resource_spec" in selected_image:
                resource_spec = selected_image["resource_spec"]

            # Accelerators attached to the image
            if "accelerators" in selected_image:
                accelerators = selected_image["accelerators"]

            # Placement of image
            if "placement" in selected_image:
                placement = selected_image["placement"]

            # Logdriver of image
            if "log_driver" in selected_image:
                log_driver = selected_image["log_driver"]

            # Configs attached to image
            if "configs" in selected_image and isinstance(
                    selected_image["configs"], list):
                for c in selected_image["configs"]:
                    if isinstance(c, dict):
                        self.configs.append(c)

            endpoint_spec = {}
            if "endpoint_spec" in selected_image:
                endpoint_spec = selected_image["endpoint_spec"]

            if self.configs:
                # Check that the supplied configs already exists
                current_configs = yield self.docker("configs")
                config_error_msg = (
                    "The server has a misconfigured config, "
                    "please contact an administrator to resolve this")

                for c in self.configs:
                    if "config_name" not in c:
                        self.log.error("Config: {} does not have a "
                                       "required config_name key".format(c))
                        raise Exception(config_error_msg)
                    if "config_id" not in c:
                        # Find the id from the supplied name
                        config_ids = [
                            cc["ID"] for cc in current_configs
                            if cc["Spec"]["Name"] == c["config_name"]
                        ]
                        if not config_ids:
                            self.log.error(
                                "A config with name {} could not be found")
                            raise Exception(config_error_msg)
                        c["config_id"] = config_ids[0]

                container_spec.update(
                    {"configs": [ConfigReference(**c) for c in self.configs]})

            # Prepare the accelerators and attach it to the environment
            if accelerators:
                for accelerator in accelerators:
                    accelerator_id = accelerator.aquire(self.user.name)
                    # NVIDIA_VISIBLE_DEVICES=0:0
                    container_spec["env"][
                        "NVIDIA_VISIBLE_DEVICES"] = "{}".format(accelerator_id)

            # Global container user
            uid_gid = None
            if "uid_gid" in container_spec:
                uid_gid = copy.deepcopy(container_spec["uid_gid"])
                del container_spec["uid_gid"]

            # Image user
            if "uid_gid" in selected_image:
                uid_gid = selected_image["uid_gid"]

            self.log.info("gid info {}".format(uid_gid))
            if isinstance(uid_gid, str):
                if ":" in uid_gid:
                    uid, gid = uid_gid.split(":")
                else:
                    uid, gid = uid_gid, None

                if (uid == "{uid}" and hasattr(self.user, "uid")
                        and self.user.uid is not None):
                    uid = self.user.uid

                if (gid is not None and gid == "{gid}"
                        and hasattr(self.user, "gid")
                        and self.user.gid is not None):
                    gid = self.user.gid

                if uid:
                    container_spec.update({"user": str(uid)})
                if uid and gid:
                    container_spec.update({"user": str(uid) + ":" + str(gid)})

            # Global container user
            if "user" in container_spec:
                container_spec["user"] = str(container_spec["user"])

            # Image user
            if "user" in selected_image:
                container_spec.update({"user": str(selected_image["user"])})

            dynamic_holders = [Spawner, self, self.user]
            if hasattr(self.user, "data"):
                dynamic_holders.append(self.user.data)

            # Expand container_spec before start
            for construct in dynamic_holders:
                try:
                    if not hasattr(construct, "__dict__"):
                        continue
                    recursive_format(container_spec, construct.__dict__)
                except TypeError:
                    pass

            # Log driver
            log_driver_name, log_driver_options = None, None
            if log_driver and isinstance(log_driver, dict):
                if "name" in log_driver:
                    log_driver_name = log_driver["name"]
                if "options" in log_driver:
                    log_driver_options = log_driver["options"]

            # Create the service
            container_spec = ContainerSpec(image, **container_spec)
            resources = Resources(**resource_spec)
            placement = Placement(**placement)

            task_log_driver = None
            if log_driver_name:
                task_log_driver = DriverConfig(log_driver_name,
                                               options=log_driver_options)

            task_spec = {
                "container_spec": container_spec,
                "resources": resources,
                "placement": placement,
            }

            if task_log_driver:
                task_spec.update({"log_driver": task_log_driver})

            task_tmpl = TaskTemplate(**task_spec)
            self.log.debug("task temp: {}".format(task_tmpl))
            # Set endpoint spec
            endpoint_spec = EndpointSpec(**endpoint_spec)

            resp = yield self.docker(
                "create_service",
                task_tmpl,
                name=self.service_name,
                networks=networks,
                endpoint_spec=endpoint_spec,
            )
            self.service_id = resp["ID"]
            self.log.info("Created Docker service {} (id: {}) from image {}"
                          " for user {}".format(self.service_name,
                                                self.service_id[:7], image,
                                                self.user))

            yield self.wait_for_running_tasks()

        else:
            self.log.info("Found existing Docker service '{}' (id: {})".format(
                self.service_name, self.service_id[:7]))
            # Handle re-using API token.
            # Get the API token from the environment variables
            # of the running service:
            envs = service["Spec"]["TaskTemplate"]["ContainerSpec"]["Env"]
            for line in envs:
                if line.startswith("JPY_API_TOKEN="):
                    self.api_token = line.split("=", 1)[1]
                    break

        ip = self.service_name
        port = self.service_port
        self.log.debug("Active service: '{}' with user '{}'".format(
            self.service_name, self.user))

        # we use service_name instead of ip
        # https://docs.docker.com/engine/swarm/networking/#use-swarm-mode-service-discovery
        # service_port is actually equal to 8888
        return ip, port
Example #9
0
server = UnixServer(address)
client = docker.from_env()
atexit.register(removeservice, docker_client=client)
while (True):
    try:
        message_received = server.read_data()
        image_name = "127.0.0.1:5000/r3_assembly_coordinator:{}".format(
            message_received)
        workertask_name = "127.0.0.1:5000/workertask9"
        if message_received == "no_cushion":
            workertask_service = client.services.list(
                filters={"name": "robot3_workertask9"})
            if len(workertask_service) != 0:
                workertask_service[0].remove()
        else:
            workertask_service = client.services.list(
                filters={"name": "robot3_workertask9"})
            if len(workertask_service) == 0:
                client.services.create(image=workertask_name,
                                       name="robot3_workertask9",
                                       constraints=["node.hostname==GOCASr3"],
                                       resources=Resources(mem_limit=52428800),
                                       mounts=["/tmp:/tmp:rw"],
                                       env=["PYTHONUNBUFFERED=1"])
        print(image_name)
        service = client.services.list(
            filters={"name": "robot3_r3_assembly_coordinator"})
        service[0].update(image=image_name)
    except ConnectionResetError as e:
        print(e)
Example #10
0
def gen_director_service_params(  # pylint: disable=unused-argument
        client: DockerClient, site_id: int,
        site_data: Dict[str, Any]) -> Dict[str, Any]:
    extra_env = {
        "PORT": "80",
        "HOST": "0.0.0.0",
    }

    params = gen_director_shared_params(client, site_id, site_data)

    env = params.pop("env", [])
    env.extend("{}={}".format(name, val) for name, val in extra_env.items())

    # We do the run.sh path detection in the shell so that it can adapt to the path changing without
    # updating the Docker service
    # The killing of the child process is based off of
    # https://unix.stackexchange.com/a/146770/306760
    shell_command = """date +'DIRECTOR: Starting server at %Y-%m-%d %H:%M:%S %Z'
# See docs/UMASK.md before touching this
umask "$1"
for path in /site/run.sh /site/private/run.sh /site/public/run.sh; do
    if [ -x "$path" ]; then
        term() {
            date +'DIRECTOR: Stopping server at %Y-%m-%d %H:%M:%S %Z'
            kill "$child"
        }
        trap term TERM

        "$path" &
        child="$!"

        while ! wait; do true; done
        exec date +'DIRECTOR: Stopped server at %Y-%m-%d %H:%M:%S %Z'
    fi
done
echo 'DIRECTOR: No run.sh file found -- if it exists, make sure it is set as executable'
exec sleep 2147483647"""

    params.update({
        "name":
        get_director_service_name(site_id),
        "read_only":
        True,
        # See docs/UMASK.md before touching this
        "command":
        ["sh", "-c", shell_command, "sh",
         oct(settings.SITE_UMASK)[2:]],
        "workdir":
        "/site/public",
        "networks": ["director-sites"],
        "resources":
        Resources(
            # 0.1 CPUs, 100M or so of memory
            cpu_limit=convert_cpu_limit(site_data["resource_limits"]["cpus"]),
            mem_limit=convert_memory_limit(
                site_data["resource_limits"]["mem_limit"]),
        ),
        "env":
        env,
        "log_driver":
        "json-file",
        "log_driver_options": {
            # Keep minimal logs
            "max-size": "500k",
            "max-file": "1",
        },
        "hosts":
        params.pop("extra_hosts"),
        "stop_grace_period":
        3,
        "endpoint_spec":
        EndpointSpec(mode="vip", ports={}),
        "mode":
        ServiceMode(mode="replicated",
                    replicas=1 if site_data["is_being_served"] else 0),
        "restart_policy":
        RestartPolicy(condition="any", delay=5, max_attempts=5, window=0),
        "update_config":
        UpdateConfig(
            parallelism=1,
            order="stop-first",
            failure_action="rollback",
            max_failure_ratio=0,
            # delay and monitor are in nanoseconds (1e9 seconds)
            delay=int(5 * (10**9)),
            monitor=int(5 * (10**9)),
        ),
    })

    return params
Example #11
0
        List: List of Services
    """
    return client.services.list(filters={"label": [stack_name]})
    # return sorted(client.services.list(filters={"label": [check_label]}), key=lambda x: get_created(x), reverse=True)


def get_nodes():
    """Returns a list of nodes in the swarm

    Returns:
        List: List of Nodes
    """
    return client.nodes.list()


resources = Resources(mem_limit='1.5g')


def create_service(username):
    """Created a new server service

    Args:
        username (string): The username for the OP user

    Returns:
        Service: The mc server service
    """
    # global port_last

    # vol = {check_label+"_user_"+username: {'bind': '/server', 'mode': 'rw'}} if username != None else False
    env = [f"OP_USERNAME={username}"
Example #12
0
    def test_restart_retains_settings(self):
        test_network = self.create_network('pygen-swarm-test',
                                           driver='overlay')
        test_secret = self.create_secret('pygen-secret', 'TopSecret')

        test_service = self.start_service(
            image=os.environ.get('TEST_IMAGE', 'alpine'),
            command='sh',
            args=['-c', 'sleep 3600'],
            constraints=['node.role==manager'],
            container_labels={'pygen.container.label': 'label-on-container'},
            endpoint_spec={
                'Ports': [{
                    'Protocol': 'tcp',
                    'PublishedPort': 8080,
                    'TargetPort': 5000
                }]
            },
            env=['PYGEN_CONTAINER_ENV=env-on-container'],
            hostname='pygen-swarm-test-512',
            labels={'pygen.service.label': 'label-on-service'},
            mode={'Replicated': {
                'Replicas': 2
            }},
            mounts=['/var:/hostvar:ro'],
            networks=[test_network.id],
            resources=Resources(mem_limit=8128128),
            restart_policy=dict(condition='on-failure', delay=3),
            secrets=[
                SecretReference(secret_id=test_secret.id,
                                secret_name=test_secret.name)
            ],
            stop_grace_period=1,
            update_config=dict(parallelism=12, delay=7),
            user='******',
            workdir='/hostvar')

        self.wait_for_service_running(test_service)

        initial_service = self.api.services(desired_task_state='').matching(
            test_service.id).first_value

        def verify_all(service):
            self.assertIsNotNone(service)
            self.assertGreaterEqual(len(service.tasks), 2)
            self.assertEqual(
                service.image, test_service.attrs['Spec']['TaskTemplate']
                ['ContainerSpec']['Image'])
            self.assertEqual(service.name, test_service.name)
            self.assertEqual(
                len(service.raw.attrs['Spec']['EndpointSpec']['Ports']), 1)
            self.assertEqual(
                service.raw.attrs['Spec']['EndpointSpec']['Ports'][0]
                ['TargetPort'], 5000)
            self.assertEqual(
                service.raw.attrs['Spec']['EndpointSpec']['Ports'][0]
                ['PublishedPort'], 8080)
            self.assertEqual(service.raw.attrs['Spec']['Labels'],
                             {'pygen.service.label': 'label-on-service'})
            self.assertEqual(
                service.raw.attrs['Spec']['UpdateConfig']['Delay'], 7)
            self.assertEqual(
                service.raw.attrs['Spec']['UpdateConfig']['Parallelism'], 12)
            self.assertIn('Replicated', service.raw.attrs['Spec']['Mode'])
            self.assertEqual(
                service.raw.attrs['Spec']['Mode']['Replicated']['Replicas'], 2)
            self.assertIn(
                test_network.id,
                (n.get('Target')
                 for n in service.raw.attrs['Spec']['TaskTemplate'].get(
                     'Networks', service.raw.attrs['Spec'].get('Networks', {}))
                 ))

            task_template = service.raw.attrs['Spec']['TaskTemplate']

            self.assertEqual(task_template['Placement']['Constraints'],
                             ['node.role==manager'])
            self.assertEqual(task_template['ContainerSpec']['Command'], ['sh'])
            self.assertEqual(task_template['ContainerSpec']['Args'],
                             ['-c', 'sleep 3600'])
            self.assertEqual(len(task_template['ContainerSpec']['Secrets']), 1)
            self.assertEqual(
                task_template['ContainerSpec']['Secrets'][0]['SecretID'],
                test_secret.id)
            self.assertEqual(
                task_template['ContainerSpec']['Secrets'][0]['SecretName'],
                test_secret.name)
            self.assertEqual(
                task_template['ContainerSpec']['Secrets'][0]['File']['Name'],
                test_secret.name)
            six.assertRegex(self, task_template['ContainerSpec']['Image'],
                            '^%s' % os.environ.get('TEST_IMAGE', 'alpine'))
            self.assertEqual(task_template['ContainerSpec']['Hostname'],
                             'pygen-swarm-test-512')
            self.assertEqual(task_template['ContainerSpec']['Labels'],
                             {'pygen.container.label': 'label-on-container'})
            self.assertEqual(task_template['ContainerSpec']['User'], 'root')
            self.assertEqual(task_template['ContainerSpec']['Env'],
                             ['PYGEN_CONTAINER_ENV=env-on-container'])
            self.assertEqual(len(task_template['ContainerSpec']['Mounts']), 1)
            self.assertEqual(
                task_template['ContainerSpec']['Mounts'][0]['Source'], '/var')
            self.assertEqual(
                task_template['ContainerSpec']['Mounts'][0]['Target'],
                '/hostvar')
            self.assertTrue(
                task_template['ContainerSpec']['Mounts'][0]['ReadOnly'])
            self.assertEqual(task_template['ContainerSpec']['StopGracePeriod'],
                             1)
            self.assertEqual(task_template['ContainerSpec']['Dir'], '/hostvar')
            self.assertTrue(task_template['RestartPolicy']['Condition'],
                            'on-failure')
            self.assertTrue(task_template['RestartPolicy']['Delay'], 3)
            self.assertTrue(
                task_template['Resources']['Limits']['MemoryBytes'], 8128128)

        verify_all(initial_service)

        initial_service.raw.update(force_update=20)

        self._wait_for_tasks(test_service, 4)

        current_service = self.api.services(desired_task_state='').matching(
            test_service.id).first_value

        self.assertGreater(current_service.version, initial_service.version)
        self.assertNotEqual(set(t.id for t in current_service.tasks),
                            set(t.id for t in initial_service.tasks))

        verify_all(current_service)
Example #13
0
    def test_restart_service_retains_settings(self):
        from docker.types import EndpointSpec, Resources, RestartPolicy, SecretReference, UpdateConfig

        join_command = self.init_swarm()

        with self.with_dind_container() as second_dind:
            self.prepare_images('alpine', client=self.dind_client(second_dind))

            second_dind.exec_run(join_command)

            network = self.remote_client.networks.create('pygen-net',
                                                         driver='overlay')

            secret = self.remote_client.secrets.create(name='pygen-secret',
                                                       data='TopSecret')

            secret.reload()

            service = self.remote_client.services.create(
                'alpine',
                name='target-svc',
                mode='global',
                command='sh -c "date +%s ; sleep 3600"',
                constraints=['node.hostname != non-existing-node'],
                container_labels={'container.label': 'testing'},
                dns_config={'Nameservers': ['8.8.8.8']},
                endpoint_spec=EndpointSpec(mode='vip', ports={14002: 1234}),
                env=['TEST_ENV_VAR=12345'],
                labels={'service.label': 'on-service'},
                mounts=['/tmp:/data/hosttmp:ro'],
                networks=[network.name],
                resources=Resources(mem_limit=24000000),
                restart_policy=RestartPolicy(condition='any',
                                             delay=5,
                                             max_attempts=3),
                secrets=[
                    SecretReference(secret_id=secret.id,
                                    secret_name=secret.name)
                ],
                stop_grace_period=1,
                update_config=UpdateConfig(parallelism=1,
                                           delay=1,
                                           monitor=7200000000),
                user='******',
                workdir='/data/hosttmp',
                tty=True)

            self.wait_for_service_start(service, num_tasks=2)

            service.reload()

            initial_spec = service.attrs['Spec']

            command = ['--template #ok', '--restart target-svc', '--one-shot']

            self.remote_client.containers.run(
                'pygen-build',
                command=' '.join(command),
                remove=True,
                volumes=['/var/run/docker.sock:/var/run/docker.sock:ro'])

            self.wait_for_service_start(service, num_tasks=4)

            service = self.remote_client.services.get(service.id)

            service.reload()

            newer_spec = service.attrs['Spec']

            del initial_spec['TaskTemplate']['ForceUpdate']
            del newer_spec['TaskTemplate']['ForceUpdate']

            initial_networks = initial_spec.pop(
                'Networks', initial_spec['TaskTemplate'].pop('Networks', []))
            newer_networks = newer_spec.pop(
                'Networks', newer_spec['TaskTemplate'].pop('Networks', []))

            self.maxDiff = None

            self.assertGreater(len(newer_networks), 0)
            self.assertEqual(newer_networks, initial_networks)
            self.assertDictEqual(newer_spec, initial_spec)
Example #14
0
    def start(self):
        """Start the single-user server in a docker service.
        You can specify the params for the service through
        jupyterhub_config.py or using the user_options
        """
        self.log.info("User: {}, start spawn".format(self.user.__dict__))

        # https://github.com/jupyterhub/jupyterhub
        # /blob/master/jupyterhub/user.py#L202
        # By default jupyterhub calls the spawner passing user_options
        if self.use_user_options:
            user_options = self.user_options
        else:
            user_options = {}

        service = yield self.get_service()
        if service is None:
            # Validate state
            if hasattr(self, 'container_spec') \
                    and self.container_spec is not None:
                container_spec = dict(**self.container_spec)
            elif user_options == {}:
                self.log.error("User: {} is trying to create a service"
                               " without a container_spec".format(self.user))
                raise Exception("That notebook is missing a specification"
                                "to launch it, contact the admin to resolve "
                                "this issue")

            # Setup service
            container_spec.update(user_options.get('container_spec', {}))

            # Which image to spawn
            if self.use_user_options and 'user_selected_image' in user_options:
                uimage = user_options['user_selected_image']
                image_info = None
                for di in self.dockerimages:
                    if di['image'] == uimage:
                        image_info = copy.deepcopy(di)
                if image_info is None:
                    err_msg = "User selected image: {} couldn't be found" \
                        .format(uimage['image'])
                    self.log.error(err_msg)
                    raise Exception(err_msg)
            else:
                # Default image
                image_info = self.dockerimages[0]

            self.log.debug("Image info: {}".format(image_info))
            # Does that image have restricted access
            if 'access' in image_info:
                # Check for static or db users
                allowed = False
                if self.service_owner in image_info['access']:
                    allowed = True
                else:
                    if os.path.exists(image_info['access']):
                        db_path = image_info['access']
                        try:
                            self.log.info("Checking db: {} for "
                                          "User: {}".format(
                                              db_path, self.service_owner))
                            with open(db_path, 'r') as db:
                                users = [
                                    user.rstrip('\n').rstrip('\r\n')
                                    for user in db
                                ]
                                if self.service_owner in users:
                                    allowed = True
                        except IOError as err:
                            self.log.error("User: {} tried to open db file {},"
                                           "Failed {}".format(
                                               self.service_owner, db_path,
                                               err))
                if not allowed:
                    self.log.error(
                        "User: {} tried to launch {} without access".format(
                            self.service_owner, image_info['image']))
                    raise Exception(
                        "You don't have permission to launch that image")

            self.log.debug("Container spec: {}".format(container_spec))

            # Setup mounts
            mounts = []
            # Global mounts
            if 'mounts' in container_spec:
                mounts.extend(container_spec['mounts'])
            container_spec['mounts'] = []

            # Image mounts
            if 'mounts' in image_info:
                mounts.extend(image_info['mounts'])

            for mount in mounts:
                if isinstance(mount, dict):
                    m = VolumeMounter(mount)
                    m = yield m.create(owner=self.service_owner)
                else:
                    # Expects a mount_class that supports 'create'
                    if hasattr(self.user, 'data'):
                        m = yield mount.create(self.user.data,
                                               owner=self.service_owner)
                    else:
                        m = yield mount.create(owner=self.service_owner)
                container_spec['mounts'].append(m)

            # Some envs are required by the single-user-image
            if 'env' in container_spec:
                container_spec['env'].update(self.get_env())
            else:
                container_spec['env'] = self.get_env()

            # Env of image
            if 'env' in image_info and isinstance(image_info['env'], dict):
                container_spec['env'].update(image_info['env'])

            # Dynamic update of env values
            for env_key, env_value in container_spec['env'].items():
                stripped_value = env_value.lstrip('{').rstrip('}')
                if hasattr(self, stripped_value) \
                        and isinstance(getattr(self, stripped_value), str):
                    container_spec['env'][env_key] = getattr(
                        self, stripped_value)
                if hasattr(self.user, stripped_value) \
                        and isinstance(getattr(self.user, stripped_value), str):
                    container_spec['env'][env_key] = getattr(
                        self.user, stripped_value)
                if hasattr(self.user, 'data') \
                        and hasattr(self.user.data, stripped_value)\
                        and isinstance(getattr(self.user.data, stripped_value), str):
                    container_spec['env'][env_key] = getattr(
                        self.user.data, stripped_value)

            # Args of image
            if 'args' in image_info and isinstance(image_info['args'], list):
                container_spec.update({'args': image_info['args']})

            if 'command' in image_info and isinstance(image_info['command'], list)\
                    or 'command' in image_info and \
                    isinstance(image_info['command'], str):
                container_spec.update({'command': image_info['command']})

            # Log mounts config
            self.log.debug("User: {} container_spec mounts: {}".format(
                self.user, container_spec['mounts']))

            # Global resource_spec
            resource_spec = {}
            if hasattr(self, 'resource_spec'):
                resource_spec = self.resource_spec
            resource_spec.update(user_options.get('resource_spec', {}))

            networks = None
            if hasattr(self, 'networks'):
                networks = self.networks
            if user_options.get('networks') is not None:
                networks = user_options.get('networks')

            # Global placement
            placement = None
            if hasattr(self, 'placement'):
                placement = self.placement
            if user_options.get('placement') is not None:
                placement = user_options.get('placement')

            # Image to spawn
            image = image_info['image']

            # Image resources
            if 'resource_spec' in image_info:
                resource_spec = image_info['resource_spec']

            # Placement of image
            if 'placement' in image_info:
                placement = image_info['placement']

            # Configs attached to image
            if 'configs' in image_info and isinstance(image_info['configs'],
                                                      list):
                for c in image_info['configs']:
                    if isinstance(c, dict):
                        self.configs.append(c)

            if self.configs:
                # Check that the supplied configs already exists
                current_configs = yield self.docker('configs')
                config_error_msg = "The server has a misconfigured config, " \
                    "please contact an administrator to resolve this"

                for c in self.configs:
                    if 'config_name' not in c:
                        self.log.error("Config: {} does not have a "
                                       "required config_name key".format(c))
                        raise Exception(config_error_msg)
                    if 'config_id' not in c:
                        # Find the id from the supplied name
                        config_ids = [
                            cc['ID'] for cc in current_configs
                            if cc['Spec']['Name'] == c['config_name']
                        ]
                        if not config_ids:
                            self.log.error(
                                "A config with name {} could not be found")
                            raise Exception(config_error_msg)
                        c['config_id'] = config_ids[0]

                container_spec.update(
                    {'configs': [ConfigReference(**c) for c in self.configs]})

            # Global container user
            uid_gid = None
            if 'uid_gid' in container_spec:
                uid_gid = copy.deepcopy(container_spec['uid_gid'])
                del container_spec['uid_gid']

            # Image user
            if 'uid_gid' in image_info:
                uid_gid = image_info['uid_gid']

            self.log.info("gid info {}".format(uid_gid))
            if isinstance(uid_gid, str):
                if ":" in uid_gid:
                    uid, gid = uid_gid.split(":")
                else:
                    uid, gid = uid_gid, None

                if uid == '{uid}' and hasattr(self.user, 'uid') \
                        and self.user.uid is not None:
                    uid = self.user.uid

                if gid is not None and gid == '{gid}' \
                        and hasattr(self.user, 'gid') \
                        and self.user.gid is not None:
                    gid = self.user.gid

                if uid:
                    container_spec.update({'user': str(uid)})
                if uid and gid:
                    container_spec.update({'user': str(uid) + ":" + str(gid)})

            # Global container user
            if 'user' in container_spec:
                container_spec['user'] = str(container_spec['user'])

            # Image user
            if 'user' in image_info:
                container_spec.update({'user': str(image_info['user'])})

            dynamic_holders = [Spawner, self, self.user]
            if hasattr(self.user, 'data'):
                dynamic_holders.append(self.user.data)

            # Expand container_spec before start
            for construct in dynamic_holders:
                try:
                    if not hasattr(construct, '__dict__'):
                        continue
                    recursive_format(container_spec, construct.__dict__)
                except TypeError:
                    pass

            # Create the service
            container_spec = ContainerSpec(image, **container_spec)
            resources = Resources(**resource_spec)
            placement = Placement(**placement)

            task_spec = {
                'container_spec': container_spec,
                'resources': resources,
                'placement': placement
            }

            task_tmpl = TaskTemplate(**task_spec)
            self.log.info("task temp: {}".format(task_tmpl))
            resp = yield self.docker('create_service',
                                     task_tmpl,
                                     name=self.service_name,
                                     networks=networks)
            self.service_id = resp['ID']
            self.log.info("Created Docker service {} (id: {}) from image {}"
                          " for user {}".format(self.service_name,
                                                self.service_id[:7], image,
                                                self.user))

            yield self.wait_for_running_tasks()

        else:
            self.log.info("Found existing Docker service '{}' (id: {})".format(
                self.service_name, self.service_id[:7]))
            # Handle re-using API token.
            # Get the API token from the environment variables
            # of the running service:
            envs = service['Spec']['TaskTemplate']['ContainerSpec']['Env']
            for line in envs:
                if line.startswith('JPY_API_TOKEN='):
                    self.api_token = line.split('=', 1)[1]
                    break

        ip = self.service_name
        port = self.service_port
        self.log.debug("Active service: '{}' with user '{}'".format(
            self.service_name, self.user))

        # we use service_name instead of ip
        # https://docs.docker.com/engine/swarm/networking/#use-swarm-mode-service-discovery
        # service_port is actually equal to 8888
        return ip, port