class KubeSpawner(Spawner): """ Implement a JupyterHub spawner to spawn pods in a Kubernetes Cluster. """ # We want to have one threadpool executor that is shared across all spawner objects # This is initialized by the first spawner that is created executor = None # We also want only one pod reflector per application pod_reflector = None def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # By now, all the traitlets have been set, so we can use them to compute # other attributes if self.__class__.executor is None: self.__class__.executor = ThreadPoolExecutor( max_workers=self.k8s_api_threadpool_workers) main_loop = IOLoop.current() def on_reflector_failure(): self.log.critical("Pod reflector failed, halting Hub.") main_loop.stop() # This will start watching in __init__, so it'll start the first # time any spawner object is created. Not ideal but works! if self.__class__.pod_reflector is None: self.__class__.pod_reflector = PodReflector( parent=self, namespace=self.namespace, on_failure=on_reflector_failure) self.api = shared_client('CoreV1Api') self.pod_name = self._expand_user_properties(self.pod_name_template) self.pvc_name = self._expand_user_properties(self.pvc_name_template) if self.hub_connect_ip: scheme, netloc, path, params, query, fragment = urlparse( self.hub.api_url) netloc = '{ip}:{port}'.format( ip=self.hub_connect_ip, port=self.hub_connect_port, ) self.accessible_hub_api_url = urlunparse( (scheme, netloc, path, params, query, fragment)) else: self.accessible_hub_api_url = self.hub.api_url if self.port == 0: # Our default port is 8888 self.port = 8888 k8s_api_threadpool_workers = Integer( # Set this explicitly, since this is the default in Python 3.5+ # but not in 3.4 5 * multiprocessing.cpu_count(), config=True, help=""" Number of threads in thread pool used to talk to the k8s API. Increase this if you are dealing with a very large number of users. Defaults to `5 * cpu_cores`, which is the default for `ThreadPoolExecutor`. """) namespace = Unicode(config=True, help=""" Kubernetes namespace to spawn user pods in. If running inside a kubernetes cluster with service accounts enabled, defaults to the current namespace. If not, defaults to `default` """) def _namespace_default(self): """ Set namespace default to current namespace if running in a k8s cluster If not in a k8s cluster with service accounts enabled, default to `default` """ ns_path = '/var/run/secrets/kubernetes.io/serviceaccount/namespace' if os.path.exists(ns_path): with open(ns_path) as f: return f.read().strip() return 'default' ip = Unicode('0.0.0.0', help=""" The IP address (or hostname) the single-user server should listen on. We override this from the parent so we can set a more sane default for the Kubernetes setup. """).tag(config=True) cmd = Command(None, allow_none=True, minlen=0, help=""" The command used for starting the single-user server. Provide either a string or a list containing the path to the startup script command. Extra arguments, other than this path, should be provided via `args`. This is usually set if you want to start the single-user server in a different python environment (with virtualenv/conda) than JupyterHub itself. Some spawners allow shell-style expansion here, allowing you to use environment variables. Most, including the default, do not. Consult the documentation for your spawner to verify! If set to `None`, Kubernetes will start the `CMD` that is specified in the Docker image being started. """).tag(config=True) singleuser_working_dir = Unicode(None, allow_none=True, help=""" The working directory were the Notebook server will be started inside the container. Defaults to `None` so the working directory will be the one defined in the Dockerfile. """).tag(config=True) singleuser_service_account = Unicode(None, allow_none=True, config=True, help=""" The service account to be mounted in the spawned user pod. When set to `None` (the default), no service account is mounted, and the default service account is explicitly disabled. This `serviceaccount` must already exist in the namespace the user pod is being spawned in. WARNING: Be careful with this configuration! Make sure the service account being mounted has the minimal permissions needed, and nothing more. When misconfigured, this can easily give arbitrary users root over your entire cluster. """) pod_name_template = Unicode('jupyter-{username}{servername}', config=True, help=""" Template to use to form the name of user's pods. `{username}` and `{userid}` are expanded to the escaped, dns-label safe username & integer user id respectively. This must be unique within the namespace the pods are being spawned in, so if you are running multiple jupyterhubs spawning in the same namespace, consider setting this to be something more unique. """) user_storage_pvc_ensure = Bool(False, config=True, help=""" Ensure that a PVC exists for each user before spawning. Set to true to create a PVC named with `pvc_name_template` if it does not exist for the user when their pod is spawning. """) pvc_name_template = Unicode('claim-{username}{servername}', config=True, help=""" Template to use to form the name of user's pvc. `{username}` and `{userid}` are expanded to the escaped, dns-label safe username & integer user id respectively. This must be unique within the namespace the pvc are being spawned in, so if you are running multiple jupyterhubs spawning in the same namespace, consider setting this to be something more unique. """) hub_connect_ip = Unicode(None, config=True, allow_none=True, help=""" IP/DNS hostname to be used by pods to reach out to the hub API. Defaults to `None`, in which case the `hub_ip` config is used. In kubernetes contexts, this is often not the same as `hub_ip`, since the hub runs in a pod which is fronted by a service. This IP should be something that pods can access to reach the hub process. This can also be through the proxy - API access is authenticated with a token that is passed only to the hub, so security is fine. Usually set to the service IP / DNS name of the service that fronts the hub pod (deployment/replicationcontroller/replicaset) Used together with `hub_connect_port` configuration. """) hub_connect_port = Integer(config=True, help=""" Port to use by pods to reach out to the hub API. Defaults to be the same as `hub_port`. In kubernetes contexts, this is often not the same as `hub_port`, since the hub runs in a pod which is fronted by a service. This allows easy port mapping, and some systems take advantage of it. This should be set to the `port` attribute of a service that is fronting the hub pod. """) def _hub_connect_port_default(self): """ Set default port on which pods connect to hub to be the hub port The hub needs to be accessible to the pods at this port. We default to the port the hub is listening on. This would be overriden in case some amount of port mapping is happening. """ return self.hub.server.port singleuser_extra_labels = Dict({}, config=True, help=""" Extra kubernetes labels to set on the spawned single-user pods. The keys and values specified here would be set as labels on the spawned single-user kubernetes pods. The keys and values must both be strings that match the kubernetes label key / value constraints. See https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ for more info on what labels are and why you might want to use them! `{username}` and `{userid}` are expanded to the escaped, dns-label safe username & integer user id respectively, wherever they are used. """) singleuser_extra_annotations = Dict({}, config=True, help=""" Extra kubernetes annotations to set on the spawned single-user pods. The keys and values specified here are added as annotations on the spawned single-user kubernetes pods. The keys and values must both be strings. See https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/ for more info on what annotations are and why you might want to use them! `{username}` and `{userid}` are expanded to the escaped, dns-label safe username & integer user id respectively, wherever they are used. """) singleuser_image_spec = Unicode('jupyterhub/singleuser:latest', config=True, help=""" Docker image spec to use for spawning user's containers. Defaults to `jupyterhub/singleuser:latest` Name of the container + a tag, same as would be used with a `docker pull` command. If tag is set to `latest`, kubernetes will check the registry each time a new user is spawned to see if there is a newer image available. If available, new image will be pulled. Note that this could cause long delays when spawning, especially if the image is large. If you do not specify a tag, whatever version of the image is first pulled on the node will be used, thus possibly leading to inconsistent images on different nodes. For all these reasons, it is recommended to specify a specific immutable tag for the imagespec. If your image is very large, you might need to increase the timeout for starting the single user container from the default. You can set this with:: c.KubeSpawner.start_timeout = 60 * 5 # Upto 5 minutes """) singleuser_image_pull_policy = Unicode('IfNotPresent', config=True, help=""" The image pull policy of the docker container specified in `singleuser_image_spec`. Defaults to `IfNotPresent` which causes the Kubelet to NOT pull the image specified in singleuser_image_spec if it already exists, except if the tag is `:latest`. For more information on image pull policy, refer to https://kubernetes.io/docs/concepts/containers/images/ This configuration is primarily used in development if you are actively changing the `singleuser_image_spec` and would like to pull the image whenever a user container is spawned. """) singleuser_image_pull_secrets = Unicode(None, allow_none=True, config=True, help=""" The kubernetes secret to use for pulling images from private repository. Set this to the name of a Kubernetes secret containing the docker configuration required to pull the image specified in `singleuser_image_spec`. https://kubernetes.io/docs/concepts/containers/images/#specifying-imagepullsecrets-on-a-pod has more information on when and why this might need to be set, and what it should be set to. """) singleuser_node_selector = Dict({}, config=True, help=""" The dictionary Selector labels used to match the Nodes where Pods will be launched. Default is None and means it will be launched in any available Node. For example to match the Nodes that have a label of `disktype: ssd` use:: {"disktype": "ssd"} """) singleuser_uid = Union([Integer(), Callable()], allow_none=True, config=True, help=""" The UID to run the single-user server containers as. This UID should ideally map to a user that already exists in the container image being used. Running as root is discouraged. Instead of an integer, this could also be a callable that takes as one parameter the current spawner instance and returns an integer. The callable will be called asynchronously if it returns a future. Note that the interface of the spawner class is not deemed stable across versions, so using this functionality might cause your JupyterHub or kubespawner upgrades to break. If set to `None`, the user specified with the `USER` directive in the container metadata is used. """) singleuser_fs_gid = Union([Integer(), Callable()], allow_none=True, config=True, help=""" The GID of the group that should own any volumes that are created & mounted. A special supplemental group that applies primarily to the volumes mounted in the single-user server. In volumes from supported providers, the following things happen: 1. The owning GID will be the this GID 2. The setgid bit is set (new files created in the volume will be owned by this GID) 3. The permission bits are OR’d with rw-rw The single-user server will also be run with this gid as part of its supplemental groups. Instead of an integer, this could also be a callable that takes as one parameter the current spawner instance and returns an integer. The callable will be called asynchronously if it returns a future, rather than an int. Note that the interface of the spawner class is not deemed stable across versions, so using this functionality might cause your JupyterHub or kubespawner upgrades to break. You'll *have* to set this if you are using auto-provisioned volumes with most cloud providers. See `fsGroup <https://kubernetes.io/docs/api-reference/v1.9/#podsecuritycontext-v1-core>`_ for more details. """) singleuser_supplemental_gids = Union([List(), Callable()], allow_none=True, config=True, help=""" A list of GIDs that should be set as additional supplemental groups to the user that the container runs as. Instead of a list of integers, this could also be a callable that takes as one parameter the current spawner instance and returns a list of integers. The callable will be called asynchronously if it returns a future, rather than a list. Note that the interface of the spawner class is not deemed stable across versions, so using this functionality might cause your JupyterHub or kubespawner upgrades to break. You may have to set this if you are deploying to an environment with RBAC/SCC enforced and pods run with a 'restricted' SCC which results in the image being run as an assigned user ID. The supplemental group IDs would need to include the corresponding group ID of the user ID the image normally would run as. The image must setup all directories/files any application needs access to, as group writable. """) singleuser_privileged = Bool(False, config=True, help=""" Whether to run the pod with a privileged security context. """) modify_pod_hook = Callable(None, allow_none=True, config=True, help=""" Callable to augment the Pod object before launching. Expects a callable that takes two parameters: 1. The spawner object that is doing the spawning 2. The Pod object that is to be launched You should modify the Pod object and return it. This can be a coroutine if necessary. When set to none, no augmenting is done. This is very useful if you want to modify the pod being launched dynamically. Note that the spawner object can change between versions of KubeSpawner and JupyterHub, so be careful relying on this! """) volumes = List([], config=True, help=""" List of Kubernetes Volume specifications that will be mounted in the user pod. This list will be directly added under `volumes` in the kubernetes pod spec, so you should use the same structure. Each item in the list must have the following two keys: - `name` Name that'll be later used in the `volume_mounts` config to mount this volume at a specific path. - `<name-of-a-supported-volume-type>` (such as `hostPath`, `persistentVolumeClaim`, etc) The key name determines the type of volume to mount, and the value should be an object specifying the various options available for that kind of volume. See https://kubernetes.io/docs/concepts/storage/volumes for more information on the various kinds of volumes available and their options. Your kubernetes cluster must already be configured to support the volume types you want to use. `{username}` and `{userid}` are expanded to the escaped, dns-label safe username & integer user id respectively, wherever they are used. """) volume_mounts = List([], config=True, help=""" List of paths on which to mount volumes in the user notebook's pod. This list will be added to the values of the `volumeMounts` key under the user's container in the kubernetes pod spec, so you should use the same structure as that. Each item in the list should be a dictionary with at least these two keys: - `mountPath` The path on the container in which we want to mount the volume. - `name` The name of the volume we want to mount, as specified in the `volumes` config. See https://kubernetes.io/docs/concepts/storage/volumes for more information on how the `volumeMount` item works. `{username}` and `{userid}` are expanded to the escaped, dns-label safe username & integer user id respectively, wherever they are used. """) user_storage_capacity = Unicode(None, config=True, allow_none=True, help=""" The ammount of storage space to request from the volume that the pvc will mount to. This ammount will be the ammount of storage space the user has to work with on their notebook. If left blank, the kubespawner will not create a pvc for the pod. This will be added to the `resources: requests: storage:` in the k8s pod spec. See https://kubernetes.io/docs/concepts/storage/persistent-volumes/#persistentvolumeclaims for more information on how storage works. Quantities can be represented externally as unadorned integers, or as fixed-point integers with one of these SI suffices (`E, P, T, G, M, K, m`) or their power-of-two equivalents (`Ei, Pi, Ti, Gi, Mi, Ki`). For example, the following represent roughly the same value: `128974848`, `129e6`, `129M`, `123Mi`. (https://github.com/kubernetes/kubernetes/blob/master/docs/design/resources.md) """) user_storage_extra_labels = Dict({}, config=True, help=""" Extra kubernetes labels to set on the user PVCs. The keys and values specified here would be set as labels on the PVCs created by kubespawner for the user. Note that these are only set when the PVC is created, not later when they are updated. See https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ for more info on what labels are and why you might want to use them! `{username}` and `{userid}` are expanded to the escaped, dns-label safe username & integer user id respectively, wherever they are used. """) user_storage_class = Unicode(None, config=True, allow_none=True, help=""" The storage class that the pvc will use. If left blank, the kubespawner will not create a pvc for the pod. This will be added to the `annotations: volume.beta.kubernetes.io/storage-class:` in the pvc metadata. This will determine what type of volume the pvc will request to use. If one exists that matches the criteria of the StorageClass, the pvc will mount to that. Otherwise, b/c it has a storage class, k8s will dynamically spawn a pv for the pvc to bind to and a machine in the cluster for the pv to bind to. See https://kubernetes.io/docs/concepts/storage/storage-classes/ for more information on how StorageClasses work. """) user_storage_access_modes = List(["ReadWriteOnce"], config=True, help=""" List of access modes the user has for the pvc. The access modes are: - `ReadWriteOnce` – the volume can be mounted as read-write by a single node - `ReadOnlyMany` – the volume can be mounted read-only by many nodes - `ReadWriteMany` – the volume can be mounted as read-write by many nodes See https://kubernetes.io/docs/concepts/storage/persistent-volumes/#access-modes for more information on how access modes work. """) singleuser_lifecycle_hooks = Dict({}, config=True, help=""" Kubernetes lifecycle hooks to set on the spawned single-user pods. The keys is name of hooks and there are only two hooks, postStart and preStop. The values are handler of hook which executes by Kubernetes management system when hook is called. Below is an sample copied from `Kubernetes doc <https://kubernetes.io/docs/tasks/configure-pod-container/attach-handler-lifecycle-event/>`_ :: lifecycle: postStart: exec: command: ["/bin/sh", "-c", "echo Hello from the postStart handler > /usr/share/message"] preStop: exec: command: ["/usr/sbin/nginx","-s","quit"] See https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/ for more info on what lifecycle hooks are and why you might want to use them! """) singleuser_init_containers = List(None, config=True, help=""" List of initialization containers belonging to the pod. This list will be directly added under `initContainers` in the kubernetes pod spec, so you should use the same structure. Each item in the list is container configuration which follows spec at https://v1-6.docs.kubernetes.io/docs/api-reference/v1.6/#container-v1-core. One usage is disabling access to metadata service from single-user notebook server with configuration below: initContainers: .. code::yaml - name: init-iptables image: <image with iptables installed> command: ["iptables", "-A", "OUTPUT", "-p", "tcp", "--dport", "80", "-d", "169.254.169.254", "-j", "DROP"] securityContext: capabilities: add: - NET_ADMIN See https://kubernetes.io/docs/concepts/workloads/pods/init-containers/ for more info on what init containers are and why you might want to use them! To user this feature, Kubernetes version must greater than 1.6. """) singleuser_extra_container_config = Dict(None, config=True, help=""" Extra configuration (e.g. ``envFrom``) for notebook container which is not covered by other attributes. This dict will be directly merge into `container` of notebook server, so you should use the same structure. Each item in the dict is field of container configuration which follows spec at https://v1-6.docs.kubernetes.io/docs/api-reference/v1.6/#container-v1-core. One usage is set ``envFrom`` on notebook container with configuration below: .. code::yaml envFrom: [ { configMapRef: { name: special-config } } ] The key could be either camelcase word (used by Kubernetes yaml, e.g. ``envFrom``) or underscore-separated word (used by kubernetes python client, e.g. ``env_from``). """) singleuser_extra_pod_config = Dict(None, config=True, help=""" Extra configuration (e.g. tolerations) for the pod which is not covered by other attributes. This dict will be directly merge into pod,so you should use the same structure. Each item in the dict is field of pod configuration which follows spec at https://v1-6.docs.kubernetes.io/docs/api-reference/v1.6/#podspec-v1-core. One usage is set dnsPolicy with configuration below:: dnsPolicy: ClusterFirstWithHostNet The `key` could be either camelcase word (used by Kubernetes yaml, e.g. `dnsPolicy`) or underscore-separated word (used by kubernetes python client, e.g. `dns_policy`). """) singleuser_extra_containers = List(None, config=True, help=""" List of containers belonging to the pod which besides to the container generated for notebook server. This list will be directly appended under `containers` in the kubernetes pod spec, so you should use the same structure. Each item in the list is container configuration which follows spec at https://v1-6.docs.kubernetes.io/docs/api-reference/v1.6/#container-v1-core. One usage is setting crontab in a container to clean sensitive data with configuration below:: [ { 'name': 'crontab', 'image': 'supercronic', 'command': ['/usr/local/bin/supercronic', '/etc/crontab'] } ] """) extra_resource_guarantees = Dict({}, config=True, help=""" The dictionary used to request arbitrary resources. Default is None and means no additional resources are requested. For example, to request 3 Nvidia GPUs:: {"nvidia.com/gpu": "3"} """) extra_resource_limits = Dict({}, config=True, help=""" The dictionary used to limit arbitrary resources. Default is None and means no additional resources are limited. For example, to add a limit of 3 Nvidia GPUs:: {"nvidia.com/gpu": "3"} """) delete_stopped_pods = Bool(True, config=True, help=""" Whether to delete pods that have stopped themselves. Set to False to leave stopped pods in the completed state, allowing for easier debugging of why they may have stopped. """) def _expand_user_properties(self, template): # Make sure username and servername match the restrictions for DNS labels safe_chars = set(string.ascii_lowercase + string.digits) # Set servername based on whether named-server initialised if self.name: servername = '-{}'.format(self.name) else: servername = '' legacy_escaped_username = ''.join( [s if s in safe_chars else '-' for s in self.user.name.lower()]) safe_username = escapism.escape(self.user.name, safe=safe_chars, escape_char='-').lower() return template.format(userid=self.user.id, username=safe_username, legacy_escape_username=legacy_escaped_username, servername=servername) def _expand_all(self, src): if isinstance(src, list): return [self._expand_all(i) for i in src] elif isinstance(src, dict): return {k: self._expand_all(v) for k, v in src.items()} elif isinstance(src, str): return self._expand_user_properties(src) else: return src def _build_common_labels(self, extra_labels): # Default set of labels, picked up from # https://github.com/kubernetes/helm/blob/master/docs/chart_best_practices/labels.md labels = { 'heritage': 'jupyterhub', 'app': 'jupyterhub', } labels.update(extra_labels) return labels def _build_pod_labels(self, extra_labels): labels = {'component': 'singleuser-server'} labels.update(extra_labels) # Make sure pod_reflector.labels in final label list labels.update(self.pod_reflector.labels) return self._build_common_labels(labels) def _build_common_annotations(self, extra_annotations): # Annotations don't need to be escaped annotations = {'hub.jupyter.org/username': self.user.name} if self.name: annotations['hub.jupyter.org/servername'] = self.name annotations.update(extra_annotations) return annotations @gen.coroutine def get_pod_manifest(self): """ Make a pod manifest that will spawn current user's notebook pod. """ if callable(self.singleuser_uid): singleuser_uid = yield gen.maybe_future(self.singleuser_uid(self)) else: singleuser_uid = self.singleuser_uid if callable(self.singleuser_fs_gid): singleuser_fs_gid = yield gen.maybe_future( self.singleuser_fs_gid(self)) else: singleuser_fs_gid = self.singleuser_fs_gid if callable(self.singleuser_supplemental_gids): singleuser_supplemental_gids = yield gen.maybe_future( self.singleuser_supplemental_gids(self)) else: singleuser_supplemental_gids = self.singleuser_supplemental_gids if self.cmd: real_cmd = self.cmd + self.get_args() else: real_cmd = None labels = self._build_pod_labels( self._expand_all(self.singleuser_extra_labels)) annotations = self._build_common_annotations( self._expand_all(self.singleuser_extra_annotations)) return make_pod( name=self.pod_name, cmd=real_cmd, port=self.port, image_spec=self.singleuser_image_spec, image_pull_policy=self.singleuser_image_pull_policy, image_pull_secret=self.singleuser_image_pull_secrets, node_selector=self.singleuser_node_selector, run_as_uid=singleuser_uid, fs_gid=singleuser_fs_gid, supplemental_gids=singleuser_supplemental_gids, run_privileged=self.singleuser_privileged, env=self.get_env(), volumes=self._expand_all(self.volumes), volume_mounts=self._expand_all(self.volume_mounts), working_dir=self.singleuser_working_dir, labels=labels, annotations=annotations, cpu_limit=self.cpu_limit, cpu_guarantee=self.cpu_guarantee, mem_limit=self.mem_limit, mem_guarantee=self.mem_guarantee, extra_resource_limits=self.extra_resource_limits, extra_resource_guarantees=self.extra_resource_guarantees, lifecycle_hooks=self.singleuser_lifecycle_hooks, init_containers=self.singleuser_init_containers, service_account=self.singleuser_service_account, extra_container_config=self.singleuser_extra_container_config, extra_pod_config=self.singleuser_extra_pod_config, extra_containers=self.singleuser_extra_containers) def get_pvc_manifest(self): """ Make a pvc manifest that will spawn current user's pvc. """ labels = self._build_common_labels( self._expand_all(self.user_storage_extra_labels)) annotations = self._build_common_annotations({}) return make_pvc(name=self.pvc_name, storage_class=self.user_storage_class, access_modes=self.user_storage_access_modes, storage=self.user_storage_capacity, labels=labels, annotations=annotations) def is_pod_running(self, pod): """ Check if the given pod is running pod must be a dictionary representing a Pod kubernetes API object. """ # FIXME: Validate if this is really the best way is_running = (pod is not None and pod.status.phase == 'Running' and pod.status.pod_ip is not None and pod.metadata.deletion_timestamp is None and all( [cs.ready for cs in pod.status.container_statuses])) return is_running def get_state(self): """ Save state required to reinstate this user's pod from scratch We save the `pod_name`, even though we could easily compute it, because JupyterHub requires you save *some* state! Otherwise it assumes your server is dead. This works around that. It's also useful for cases when the `pod_template` changes between restarts - this keeps the old pods around. """ state = super().get_state() state['pod_name'] = self.pod_name return state def load_state(self, state): """ Load state from storage required to reinstate this user's pod Since this runs after `__init__`, this will override the generated `pod_name` if there's one we have saved in state. These are the same in most cases, but if the `pod_template` has changed in between restarts, it will no longer be the case. This allows us to continue serving from the old pods with the old names. """ if 'pod_name' in state: self.pod_name = state['pod_name'] @gen.coroutine def poll(self): """ Check if the pod is still running. Returns None if it is, and 1 if it isn't. These are the return values JupyterHub expects. """ # have to wait for first load of data before we have a valid answer if not self.pod_reflector.first_load_future.done(): yield self.pod_reflector.first_load_future data = self.pod_reflector.pods.get(self.pod_name, None) if data is not None: for c in data.status.container_statuses: # return exit code if notebook container has terminated if c.name == 'notebook': if c.state.terminated: # call self.stop to delete the pod if self.delete_stopped_pods: yield self.stop(now=True) return c.state.terminated.exit_code break # None means pod is running or starting up return None # pod doesn't exist or has been deleted return 1 @run_on_executor def asynchronize(self, method, *args, **kwargs): return method(*args, **kwargs) @gen.coroutine def start(self): if self.user_storage_pvc_ensure: pvc = self.get_pvc_manifest() try: yield self.asynchronize( self.api.create_namespaced_persistent_volume_claim, namespace=self.namespace, body=pvc) except ApiException as e: if e.status == 409: self.log.info( "PVC " + self.pvc_name + " already exists, so did not create new pvc.") else: raise # If we run into a 409 Conflict error, it means a pod with the # same name already exists. We stop it, wait for it to stop, and # try again. We try 4 times, and if it still fails we give up. # FIXME: Have better / cleaner retry logic! retry_times = 4 pod = yield self.get_pod_manifest() if self.modify_pod_hook: pod = yield gen.maybe_future(self.modify_pod_hook(self, pod)) for i in range(retry_times): try: yield self.asynchronize(self.api.create_namespaced_pod, self.namespace, pod) break except ApiException as e: if e.status != 409: # We only want to handle 409 conflict errors self.log.exception("Failed for %s", pod.to_str()) raise self.log.info('Found existing pod %s, attempting to kill', self.pod_name) yield self.stop(True) self.log.info( 'Killed pod %s, will try starting singleuser pod again', self.pod_name) else: raise Exception( 'Can not create user pod %s already exists & could not be deleted' % self.pod_name) # Note: The self.start_timeout here is kinda superfluous, since # there is already a timeout on how long start can run for in # jupyterhub itself. yield exponential_backoff(lambda: self.is_pod_running( self.pod_reflector.pods.get(self.pod_name, None)), 'pod/%s did not start in %s seconds!' % (self.pod_name, self.start_timeout), timeout=self.start_timeout) pod = self.pod_reflector.pods[self.pod_name] return (pod.status.pod_ip, self.port) @gen.coroutine def stop(self, now=False): delete_options = client.V1DeleteOptions() if now: grace_seconds = 0 else: # Give it some time, but not the default (which is 30s!) # FIXME: Move this into pod creation maybe? grace_seconds = 1 delete_options.grace_period_seconds = grace_seconds self.log.info("Deleting pod %s", self.pod_name) yield self.asynchronize(self.api.delete_namespaced_pod, name=self.pod_name, namespace=self.namespace, body=delete_options, grace_period_seconds=grace_seconds) while True: data = self.pod_reflector.pods.get(self.pod_name, None) if data is None: break yield gen.sleep(1) def _env_keep_default(self): return [] def get_args(self): args = super(KubeSpawner, self).get_args() # HACK: we wanna replace --hub-api-url=self.hub.api_url with # self.accessible_hub_api_url. This is required in situations where # the IP the hub is listening on (such as 0.0.0.0) is not the IP where # it can be reached by the pods (such as the service IP used for the hub!) # FIXME: Make this better? to_replace = '--hub-api-url="%s"' % (self.hub.api_url) for i in range(len(args)): if args[i] == to_replace: args[i] = '--hub-api-url="%s"' % (self.accessible_hub_api_url) break return args
class ConfigurableHTTPProxy(Proxy): """Proxy implementation for the default configurable-http-proxy. This is the default proxy implementation for running the nodejs proxy `configurable-http-proxy`. If the proxy should not be run as a subprocess of the Hub, (e.g. in a separate container), set:: c.ConfigurableHTTPProxy.should_start = False """ proxy_process = Any() client = Instance(AsyncHTTPClient, ()) concurrency = Integer( 10, config=True, help=""" The number of requests allowed to be concurrently outstanding to the proxy Limiting this number avoids potential timeout errors by sending too many requests to update the proxy at once """, ) semaphore = Any() @default('semaphore') def _default_semaphore(self): return asyncio.BoundedSemaphore(self.concurrency) @observe('concurrency') def _concurrency_changed(self, change): self.semaphore = asyncio.BoundedSemaphore(change.new) debug = Bool(False, help="Add debug-level logging to the Proxy.", config=True) auth_token = Unicode(help="""The Proxy auth token Loaded from the CONFIGPROXY_AUTH_TOKEN env variable by default. """).tag(config=True) check_running_interval = Integer( 5, help="Interval (in seconds) at which to check if the proxy is running.", config=True, ) @default('auth_token') def _auth_token_default(self): token = os.environ.get('CONFIGPROXY_AUTH_TOKEN', '') if self.should_start and not token: # generating tokens is fine if the Hub is starting the proxy self.log.info("Generating new CONFIGPROXY_AUTH_TOKEN") token = utils.new_token() return token api_url = Unicode( config=True, help="""The ip (or hostname) of the proxy's API endpoint""") @default('api_url') def _api_url_default(self): url = '127.0.0.1:8001' proto = 'http' if self.app.internal_ssl: proto = 'https' return "{proto}://{url}".format(proto=proto, url=url) command = Command( 'configurable-http-proxy', config=True, help="""The command to start the proxy""", ) pid_file = Unicode( "jupyterhub-proxy.pid", config=True, help="File in which to write the PID of the proxy process.", ) _check_running_callback = Any( help="PeriodicCallback to check if the proxy is running") def _check_pid(self, pid): if os.name == 'nt': import psutil if not psutil.pid_exists(pid): raise ProcessLookupError try: process = psutil.Process(pid) if self.command and self.command[0]: process_cmd = process.cmdline() if process_cmd and not any(self.command[0] in clause for clause in process_cmd): raise ProcessLookupError except (psutil.AccessDenied, psutil.NoSuchProcess): # If there is a process at the proxy's PID but we don't have permissions to see it, # then it is unlikely to actually be the proxy. raise ProcessLookupError else: os.kill(pid, 0) def __init__(self, **kwargs): super().__init__(**kwargs) # check for required token if proxy is external if not self.auth_token and not self.should_start: raise ValueError( "%s.auth_token or CONFIGPROXY_AUTH_TOKEN env is required" " if Proxy.should_start is False" % self.__class__.__name__) def _check_previous_process(self): """Check if there's a process leftover and shut it down if so""" if not self.pid_file or not os.path.exists(self.pid_file): return pid_file = os.path.abspath(self.pid_file) self.log.warning("Found proxy pid file: %s", pid_file) try: with open(pid_file, "r") as f: pid = int(f.read().strip()) except ValueError: self.log.warning("%s did not appear to contain a pid", pid_file) self._remove_pid_file() return try: self._check_pid(pid) except ProcessLookupError: self.log.warning("Proxy no longer running at pid=%s", pid) self._remove_pid_file() return # if we got here, CHP is still running self.log.warning("Proxy still running at pid=%s", pid) if os.name != 'nt': sig_list = [signal.SIGTERM] * 2 + [signal.SIGKILL] for i in range(3): try: if os.name == 'nt': self._terminate_win(pid) else: os.kill(pid, sig_list[i]) except ProcessLookupError: break time.sleep(1) try: self._check_pid(pid) except ProcessLookupError: break try: self._check_pid(pid) except ProcessLookupError: self.log.warning("Stopped proxy at pid=%s", pid) self._remove_pid_file() return else: raise RuntimeError("Failed to stop proxy at pid=%s", pid) def _write_pid_file(self): """write pid for proxy to a file""" self.log.debug("Writing proxy pid file: %s", self.pid_file) with open(self.pid_file, "w") as f: f.write(str(self.proxy_process.pid)) def _remove_pid_file(self): """Cleanup pid file for proxy after stopping""" if not self.pid_file: return self.log.debug("Removing proxy pid file %s", self.pid_file) try: os.remove(self.pid_file) except FileNotFoundError: self.log.debug("PID file %s already removed", self.pid_file) pass def _get_ssl_options(self): """List of cmd proxy options to use internal SSL""" cmd = [] proxy_api = 'proxy-api' proxy_client = 'proxy-client' api_key = self.app.internal_proxy_certs[proxy_api][ 'keyfile'] # Check content in next test and just patch manulaly or in the config of the file api_cert = self.app.internal_proxy_certs[proxy_api]['certfile'] api_ca = self.app.internal_trust_bundles[proxy_api + '-ca'] client_key = self.app.internal_proxy_certs[proxy_client]['keyfile'] client_cert = self.app.internal_proxy_certs[proxy_client]['certfile'] client_ca = self.app.internal_trust_bundles[proxy_client + '-ca'] cmd.extend(['--api-ssl-key', api_key]) cmd.extend(['--api-ssl-cert', api_cert]) cmd.extend(['--api-ssl-ca', api_ca]) cmd.extend(['--api-ssl-request-cert']) cmd.extend(['--api-ssl-reject-unauthorized']) cmd.extend(['--client-ssl-key', client_key]) cmd.extend(['--client-ssl-cert', client_cert]) cmd.extend(['--client-ssl-ca', client_ca]) cmd.extend(['--client-ssl-request-cert']) cmd.extend(['--client-ssl-reject-unauthorized']) return cmd async def start(self): """Start the proxy process""" # check if there is a previous instance still around self._check_previous_process() # build the command to launch public_server = Server.from_url(self.public_url) api_server = Server.from_url(self.api_url) env = os.environ.copy() env['CONFIGPROXY_AUTH_TOKEN'] = self.auth_token cmd = self.command + [ '--ip', public_server.ip, '--port', str(public_server.port), '--api-ip', api_server.ip, '--api-port', str(api_server.port), '--error-target', url_path_join(self.hub.url, 'error'), ] if self.app.subdomain_host: cmd.append('--host-routing') if self.debug: cmd.extend(['--log-level', 'debug']) if self.ssl_key: cmd.extend(['--ssl-key', self.ssl_key]) if self.ssl_cert: cmd.extend(['--ssl-cert', self.ssl_cert]) if self.app.internal_ssl: cmd.extend(self._get_ssl_options()) if self.app.statsd_host: cmd.extend([ '--statsd-host', self.app.statsd_host, '--statsd-port', str(self.app.statsd_port), '--statsd-prefix', self.app.statsd_prefix + '.chp', ]) # Warn if SSL is not used if ' --ssl' not in ' '.join(cmd): self.log.warning( "Running JupyterHub without SSL." " I hope there is SSL termination happening somewhere else..." ) self.log.info("Starting proxy @ %s", public_server.bind_url) self.log.debug("Proxy cmd: %s", cmd) shell = os.name == 'nt' try: self.proxy_process = Popen(cmd, env=env, start_new_session=True, shell=shell) except FileNotFoundError as e: self.log.error( "Failed to find proxy %r\n" "The proxy can be installed with `npm install -g configurable-http-proxy`." "To install `npm`, install nodejs which includes `npm`." "If you see an `EACCES` error or permissions error, refer to the `npm` " "documentation on How To Prevent Permissions Errors." % self.command) raise self._write_pid_file() def _check_process(): status = self.proxy_process.poll() if status is not None: e = RuntimeError("Proxy failed to start with exit code %i" % status) raise e from None for server in (public_server, api_server): for i in range(10): _check_process() try: await server.wait_up(1) except TimeoutError: continue else: break await server.wait_up(1) _check_process() self.log.debug("Proxy started and appears to be up") pc = PeriodicCallback(self.check_running, 1e3 * self.check_running_interval) self._check_running_callback = pc pc.start() def _terminate_win(self, pid): # On Windows we spawned a shell on Popen, so we need to # terminate all child processes as well import psutil parent = psutil.Process(pid) children = parent.children(recursive=True) for child in children: child.terminate() gone, alive = psutil.wait_procs(children, timeout=5) for p in alive: p.kill() # Clear the shell, too, if it still exists. try: parent.terminate() parent.wait(timeout=5) parent.kill() except psutil.NoSuchProcess: pass def _terminate(self): """Terminate our process""" if os.name == 'nt': self._terminate_win(self.proxy_process.pid) else: self.proxy_process.terminate() def stop(self): self.log.info("Cleaning up proxy[%i]...", self.proxy_process.pid) if self._check_running_callback is not None: self._check_running_callback.stop() if self.proxy_process.poll() is None: try: self._terminate() except Exception as e: self.log.error("Failed to terminate proxy process: %s", e) self._remove_pid_file() async def check_running(self): """Check if the proxy is still running""" if self.proxy_process.poll() is None: return self.log.error( "Proxy stopped with exit code %r", 'unknown' if self.proxy_process is None else self.proxy_process.poll(), ) self._remove_pid_file() await self.start() await self.restore_routes() def _routespec_to_chp_path(self, routespec): """Turn a routespec into a CHP API path For host-based routing, CHP uses the host as the first path segment. """ path = self.validate_routespec(routespec) # CHP always wants to start with / if not path.startswith('/'): path = '/' + path # BUG: CHP doesn't seem to like trailing slashes on some endpoints (DELETE) if path != '/' and path.endswith('/'): path = path.rstrip('/') return path def _routespec_from_chp_path(self, chp_path): """Turn a CHP route into a route spec In the JSON API, CHP route keys are unescaped, so re-escape them to raw URLs and ensure slashes are in the right places. """ # chp stores routes in unescaped form. # restore escaped-form we created it with. routespec = quote(chp_path, safe='@/~') if self.host_routing: # host routes don't start with / routespec = routespec.lstrip('/') # all routes should end with / if not routespec.endswith('/'): routespec = routespec + '/' return routespec async def api_request(self, path, method='GET', body=None, client=None): """Make an authenticated API request of the proxy.""" client = client or AsyncHTTPClient() url = url_path_join(self.api_url, 'api/routes', path) if isinstance(body, dict): body = json.dumps(body) self.log.debug("Proxy: Fetching %s %s", method, url) req = HTTPRequest( url, method=method, headers={'Authorization': 'token {}'.format(self.auth_token)}, body=body, connect_timeout=3, # default: 20s request_timeout=10, # default: 20s ) async def _wait_for_api_request(): try: async with self.semaphore: return await client.fetch(req) except HTTPError as e: # Retry on potentially transient errors in CHP, typically # numbered 500 and up. Note that CHP isn't able to emit 429 # errors. if e.code >= 500: self.log.warning( "api_request to the proxy failed with status code {}, retrying..." .format(e.code)) return False # a falsy return value make exponential_backoff retry else: self.log.error( "api_request to proxy failed: {0}".format(e)) # An unhandled error here will help the hub invoke cleanup logic raise result = await exponential_backoff( _wait_for_api_request, 'Repeated api_request to proxy path "{}" failed.'.format(path), timeout=30, ) return result async def add_route(self, routespec, target, data): body = data or {} body['target'] = target body['jupyterhub'] = True path = self._routespec_to_chp_path(routespec) await self.api_request(path, method='POST', body=body) async def delete_route(self, routespec): path = self._routespec_to_chp_path(routespec) try: await self.api_request(path, method='DELETE') except HTTPError as e: if e.code == 404: # Warn about 404s because something might be wrong # but don't raise because the route is gone, # which is the goal. self.log.warning("Route %s already deleted", routespec) else: raise def _reformat_routespec(self, routespec, chp_data): """Reformat CHP data format to JupyterHub's proxy API.""" target = chp_data.pop('target') chp_data.pop('jupyterhub') return {'routespec': routespec, 'target': target, 'data': chp_data} async def get_all_routes(self, client=None): """Fetch the proxy's routes.""" proxy_poll_start_time = time.perf_counter() resp = await self.api_request('', client=client) chp_routes = json.loads(resp.body.decode('utf8', 'replace')) all_routes = {} for chp_path, chp_data in chp_routes.items(): routespec = self._routespec_from_chp_path(chp_path) if 'jupyterhub' not in chp_data: # exclude routes not associated with JupyterHub self.log.debug("Omitting non-jupyterhub route %r", routespec) continue all_routes[routespec] = self._reformat_routespec( routespec, chp_data) PROXY_POLL_DURATION_SECONDS.observe(time.perf_counter() - proxy_poll_start_time) return all_routes
class ConfigurableHTTPProxy(Proxy): """Proxy implementation for the default configurable-http-proxy. This is the default proxy implementation for running the nodejs proxy `configurable-http-proxy`. If the proxy should not be run as a subprocess of the Hub, (e.g. in a separate container), set:: c.ConfigurableHTTPProxy.should_start = False """ proxy_process = Any() client = Instance(AsyncHTTPClient, ()) concurrency = Integer( 10, config=True, help=""" The number of requests allowed to be concurrently outstanding to the proxy Limiting this number avoids potential timeout errors by sending too many requests to update the proxy at once """, ) semaphore = Any() @default('semaphore') def _default_semaphore(self): return asyncio.BoundedSemaphore(self.concurrency) @observe('concurrency') def _concurrency_changed(self, change): self.semaphore = asyncio.BoundedSemaphore(change.new) debug = Bool(False, help="Add debug-level logging to the Proxy.", config=True) auth_token = Unicode( help="""The Proxy auth token Loaded from the CONFIGPROXY_AUTH_TOKEN env variable by default. """, ).tag(config=True) check_running_interval = Integer(5, config=True) @default('auth_token') def _auth_token_default(self): token = os.environ.get('CONFIGPROXY_AUTH_TOKEN', '') if self.should_start and not token: # generating tokens is fine if the Hub is starting the proxy self.log.info("Generating new CONFIGPROXY_AUTH_TOKEN") token = utils.new_token() return token api_url = Unicode('http://127.0.0.1:8001', config=True, help="""The ip (or hostname) of the proxy's API endpoint""" ) command = Command('configurable-http-proxy', config=True, help="""The command to start the proxy""" ) _check_running_callback = Any(help="PeriodicCallback to check if the proxy is running") def __init__(self, **kwargs): super().__init__(**kwargs) # check for required token if proxy is external if not self.auth_token and not self.should_start: raise ValueError( "%s.auth_token or CONFIGPROXY_AUTH_TOKEN env is required" " if Proxy.should_start is False" % self.__class__.__name__ ) async def start(self): public_server = Server.from_url(self.public_url) api_server = Server.from_url(self.api_url) env = os.environ.copy() env['CONFIGPROXY_AUTH_TOKEN'] = self.auth_token cmd = self.command + [ '--ip', public_server.ip, '--port', str(public_server.port), '--api-ip', api_server.ip, '--api-port', str(api_server.port), '--error-target', url_path_join(self.hub.url, 'error'), ] if self.app.subdomain_host: cmd.append('--host-routing') if self.debug: cmd.extend(['--log-level', 'debug']) if self.ssl_key: cmd.extend(['--ssl-key', self.ssl_key]) if self.ssl_cert: cmd.extend(['--ssl-cert', self.ssl_cert]) if self.app.statsd_host: cmd.extend([ '--statsd-host', self.app.statsd_host, '--statsd-port', str(self.app.statsd_port), '--statsd-prefix', self.app.statsd_prefix + '.chp' ]) # Warn if SSL is not used if ' --ssl' not in ' '.join(cmd): self.log.warning("Running JupyterHub without SSL." " I hope there is SSL termination happening somewhere else...") self.log.info("Starting proxy @ %s", public_server.bind_url) self.log.debug("Proxy cmd: %s", cmd) shell = os.name == 'nt' try: self.proxy_process = Popen(cmd, env=env, start_new_session=True, shell=shell) except FileNotFoundError as e: self.log.error( "Failed to find proxy %r\n" "The proxy can be installed with `npm install -g configurable-http-proxy`." "To install `npm`, install nodejs which includes `npm`." "If you see an `EACCES` error or permissions error, refer to the `npm` " "documentation on How To Prevent Permissions Errors." % self.command ) raise def _check_process(): status = self.proxy_process.poll() if status is not None: e = RuntimeError( "Proxy failed to start with exit code %i" % status) raise e from None for server in (public_server, api_server): for i in range(10): _check_process() try: await server.wait_up(1) except TimeoutError: continue else: break await server.wait_up(1) _check_process() self.log.debug("Proxy started and appears to be up") pc = PeriodicCallback(self.check_running, 1e3 * self.check_running_interval) self._check_running_callback = pc pc.start() def _kill_proc_tree(self, pid): import psutil parent = psutil.Process(pid) children = parent.children(recursive=True) for child in children: child.kill() psutil.wait_procs(children, timeout=5) def stop(self): self.log.info("Cleaning up proxy[%i]...", self.proxy_process.pid) if self._check_running_callback is not None: self._check_running_callback.stop() if self.proxy_process.poll() is None: try: if os.name == 'nt': # On Windows we spawned a shell on Popen, so we need to # terminate all child processes as well self._kill_proc_tree(self.proxy_process.pid) else: self.proxy_process.terminate() except Exception as e: self.log.error("Failed to terminate proxy process: %s", e) async def check_running(self): """Check if the proxy is still running""" if self.proxy_process.poll() is None: return self.log.error("Proxy stopped with exit code %r", 'unknown' if self.proxy_process is None else self.proxy_process.poll() ) await self.start() await self.restore_routes() def _routespec_to_chp_path(self, routespec): """Turn a routespec into a CHP API path For host-based routing, CHP uses the host as the first path segment. """ path = self.validate_routespec(routespec) # CHP always wants to start with / if not path.startswith('/'): path = '/' + path # BUG: CHP doesn't seem to like trailing slashes on some endpoints (DELETE) if path != '/' and path.endswith('/'): path = path.rstrip('/') return path def _routespec_from_chp_path(self, chp_path): """Turn a CHP route into a route spec In the JSON API, CHP route keys are unescaped, so re-escape them to raw URLs and ensure slashes are in the right places. """ # chp stores routes in unescaped form. # restore escaped-form we created it with. routespec = quote(chp_path, safe='@/') if self.host_routing: # host routes don't start with / routespec = routespec.lstrip('/') # all routes should end with / if not routespec.endswith('/'): routespec = routespec + '/' return routespec async def api_request(self, path, method='GET', body=None, client=None): """Make an authenticated API request of the proxy.""" client = client or AsyncHTTPClient() url = url_path_join(self.api_url, 'api/routes', path) if isinstance(body, dict): body = json.dumps(body) self.log.debug("Proxy: Fetching %s %s", method, url) req = HTTPRequest(url, method=method, headers={'Authorization': 'token {}'.format( self.auth_token)}, body=body, ) async with self.semaphore: result = await client.fetch(req) return result async def add_route(self, routespec, target, data): body = data or {} body['target'] = target body['jupyterhub'] = True path = self._routespec_to_chp_path(routespec) await self.api_request( path, method='POST', body=body, ) async def delete_route(self, routespec): path = self._routespec_to_chp_path(routespec) try: await self.api_request(path, method='DELETE') except HTTPError as e: if e.code == 404: # Warn about 404s because something might be wrong # but don't raise because the route is gone, # which is the goal. self.log.warning("Route %s already deleted", routespec) else: raise def _reformat_routespec(self, routespec, chp_data): """Reformat CHP data format to JupyterHub's proxy API.""" target = chp_data.pop('target') chp_data.pop('jupyterhub') return { 'routespec': routespec, 'target': target, 'data': chp_data, } async def get_all_routes(self, client=None): """Fetch the proxy's routes.""" resp = await self.api_request('', client=client) chp_routes = json.loads(resp.body.decode('utf8', 'replace')) all_routes = {} for chp_path, chp_data in chp_routes.items(): routespec = self._routespec_from_chp_path(chp_path) if 'jupyterhub' not in chp_data: # exclude routes not associated with JupyterHub self.log.debug("Omitting non-jupyterhub route %r", routespec) continue all_routes[routespec] = self._reformat_routespec( routespec, chp_data) return all_routes
class VariableMixin(Configurable): def __init__(self, **kwargs): super().__init__(**kwargs) # Merge extra_presentation_launchers config into a copy of builtin_presentation_launchers self.merged_presentation_launchers = deepcopy( self.builtin_presentation_launchers) for frameworkname, launcher in self.extra_presentation_launchers.items( ): if frameworkname not in self.merged_presentation_launchers: self.merged_presentation_launchers[frameworkname] = {} for k, v in launcher.items(): self.merged_presentation_launchers[frameworkname][k] = v builtin_presentation_launchers = { 'voila': { #'cmd': ['python3', '-m', 'jhsingle_native_proxy.main'], # This is the default cmd anyway 'args': [ '--destport=0', 'python3', '{-}m', 'voila', '{presentation_path}', '{--}port={port}', '{--}no-browser', '{--}Voila.base_url={base_url}/', '{--}Voila.server_url=/' ], 'extra_args_fn': _get_voila_template }, 'streamlit': { 'args': [ '--destport=0', 'streamlit', 'run', '{presentation_path}', '{--}server.port={port}', '{--}server.headless=True', '{--}browser.serverAddress={origin_host}', '{--}browser.gatherUsageStats=false' ], 'debug_args': [], # The default is {--}debug, we don't want that 'extra_args_fn': _get_streamlit_debug # But --log_level=debug has to come earlier in the cmdline }, 'plotlydash': { 'args': [ '--destport=0', 'python3', '{-}m', 'plotlydash_tornado_cmd.main', '{presentation_path}', '{--}port={port}' ], 'env': { 'DASH_REQUESTS_PATHNAME_PREFIX': '{base_url}/' } }, 'bokeh': { 'args': [ '--destport=0', 'python3', '{-}m', 'bokeh_root_cmd.main', '{presentation_path}', '{--}port={port}', '{--}allow-websocket-origin={origin_host}', '--ready-check-path=/ready-check' ] }, 'rshiny': { 'args': [ '--destport=0', 'python3', '{-}m', 'rshiny_server_cmd.main', '{presentation_path}', '{--}port={port}' ] } } extra_presentation_launchers = Dict({}, help=""" Configuration dict containing details of any custom frameworks that should be made available to Dashboard creators. Any new keys added here also need to be added to the c.CDSDashboardsConfig.presentation_types list. See cdsdashboards/hubextension/spawners/variablemixin.py in the https://github.com/ideonate/cdsdashboards source code for details of the builtin_presentation_launchers dict which shows some examples. This extra_presentation_launchers config takes the same format. Any keys in extra_presentation_launchersthat also belong to builtin_presentation_launchers will be merged into the builtin config, e.g. {'streamlit':{'env':{'STREAMLIT_ENV_VAR':'TEST'}}} will overwrite only the env section of the builting streamlit launcher. """).tag(config=True) default_presentation_cmd = Command( ['python3', '-m', 'jhsingle_native_proxy.main'], allow_none=False, help=""" The command to run presentations through jhsingle_native_proxy, can be a string or list. Default is ['python3', '-m', 'jhsingle_native_proxy.main'] Change to e.g. ['start.sh', 'python3', '-m', 'jhsingle_native_proxy.main'] to ensure start hooks are run in the singleuser Docker images. """).tag(config=True) voila_template = Unicode( '', help=""" --template argument to pass to Voila. Default is blank (empty string) to not pass any template to Voila command line. """, ).tag(config=True) proxy_request_timeout = Integer( 0, help=""" Request timeout in seconds that jhsingle-native-proxy should allow when proxying to the underlying process (e.g. Voila). The default of 0 means that no --request-timeout flag will be passed to jhsingle-native-proxy so it will use its own default. """, ).tag(config=True) proxy_force_alive = Bool( True, help=""" Whether or not jhsingle-native-proxy should fake activity on its subprocess, always reporting to the hub that activity has happened. The default of True means that no flag will be passed to jhsingle-native-proxy so it will use its own default (expected to be --force-alive). If False is specified, --no-force-alive will be passed to jhsingle-native-proxy. """, ).tag(config=True) proxy_last_activity_interval = Integer( 300, help=""" Frequency in seconds that jhsingle-native-proxy should send any recent activity timestamp to the hub. If the default of 300 is specified, no --last-activity-interval flag will be passed to jhsingle-native-proxy so it will use its default. Otherwise the specified value will be passed to --last-activity-interval. """, ).tag(config=True) async def start(self): """ Copy trait values from user_options into the trait attrs of the spawner object """ if self.user_options: trait_names = set(self.trait_names()) - {'user_options'} for k in trait_names.intersection(self.user_options.keys()): merged_trait = self.user_options[k] if type(getattr(self, k, None)) == dict: # Merge dicts if one already exists for this trait merged_trait = {**getattr(self, k), **merged_trait} setattr(self, k, merged_trait) # Any update for cmd needs to be set here (args and env have their own overridden functions) presentation_type = self._get_presentation_type() if presentation_type != '': launcher = self.merged_presentation_launchers[presentation_type] if 'cmd' in launcher: self.cmd = launcher['cmd'] else: self.cmd = self.default_presentation_cmd return await super().start() def get_args(self): """Return the arguments to be passed after self.cmd Doesn't expect shell expansion to happen. Also adds self.args at the end in case specified by the config. """ presentation_type = self._get_presentation_type() if presentation_type == '': return super().get_args() launcher = self.merged_presentation_launchers[presentation_type] presentation_path = self.user_options.get('presentation_path', '') args = [] # jhsingle-native-proxy --destport $destport --authtype oauth voila `pwd` {--}port={port} {--}no-browser {--}Voila.base_url={base_url}/ {--}Voila.server_url=/ --port $port notebook_dir = '.' if self.notebook_dir: notebook_dir = self.format_string(self.notebook_dir) git_repo = self.user_options.get('git_repo', '') repofolder = '' if git_repo != '': repofolder = self._calc_repo_folder(git_repo) args.append('--repo={}'.format(_quote_safe(git_repo))) notebook_dir = os.path.join(notebook_dir, repofolder) args.append('--repofolder={}'.format(_quote_safe(notebook_dir))) if presentation_path != '' and not '..' in presentation_path: # Should have been validated when dashboard created, but .. is particularly dangerous presentation_path = re.sub( '^/+', '', presentation_path ) # Remove leading slash(es) to ensure it is relative to home folder notebook_dir = os.path.join(notebook_dir, presentation_path) if 'args' in launcher: args.extend(launcher['args']) args.append('--presentation-path={}'.format(_quote_safe(notebook_dir))) conda_env = self.user_options.get('conda_env', '') if conda_env != '': args.append('--conda-env=%s' % _quote_safe(conda_env)) if self.ip: args.append('--ip=%s' % _quote_safe(self.ip)) if self.port: args.append('--port=%i' % self.port) if self.debug: if 'debug_args' in launcher: args.extend(launcher['debug_args']) else: args.append('{--}debug') args.append('--debug') # For jhsingle-native-proxy itself proxy_request_timeout = getattr(self, 'proxy_request_timeout', 0) if proxy_request_timeout: args.append('--request-timeout={}'.format(proxy_request_timeout)) proxy_force_alive = getattr(self, 'proxy_force_alive', True) if proxy_force_alive == False: args.append('--no-force-alive') proxy_last_activity_interval = getattr(self, 'proxy_last_activity_interval', 300) if proxy_last_activity_interval != 300: args.append('--last-activity-interval={}'.format( proxy_last_activity_interval)) args.extend(self.args) if 'extra_args_fn' in launcher and callable( launcher['extra_args_fn'] ): # Last chance for launcher config to change everything and anything args = launcher['extra_args_fn'](args, self) return args def _get_presentation_type(self): """ Returns the presentation_type (e.g. '' for standard spawner, 'voila', 'streamlit' for named presentation frameworks). Throws an exception if the presentation_type doesn't have a launcher configuration in either extra_presentation_launchers or builtin_presentation_launchers. """ if self.user_options and 'presentation_type' in self.user_options: presentation_type = self.user_options['presentation_type'] if presentation_type not in self.merged_presentation_launchers: raise Exception( 'presentation type {} has not been registered with the spawner' .format(presentation_type)) return presentation_type return '' def get_env(self): env = super().get_env() presentation_type = self._get_presentation_type() if presentation_type != '': launcher = self.merged_presentation_launchers[presentation_type] if 'env' in launcher: presentation_dirname = '.' presentation_path = '' if self.user_options and 'presentation_path' in self.user_options: presentation_path = self.user_options['presentation_path'] presentation_dirname = os.path.dirname(presentation_path) self.log.info( 'presentation_dirname: {}'.format(presentation_dirname)) for k, v in launcher['env'].items(): env[k] = _fixed_format( v, base_url=self.server.base_url, presentation_dirname=presentation_dirname, presentation_path=presentation_path, username=self.user.name) return env def _calc_repo_folder(self, git_repo): s = re.sub('^https?', '', git_repo.lower()) # Remove https and convert to lower case s = re.sub('[^a-z0-9]', '-', s) # Replace any non-alphanumeric chars with dash s = re.sub( '^-+|-+$|-(?=-)', '', s ) # Remove dashes from start/end and reduce multiple dashes to just one dash return s def run_pre_spawn_hook(self): if not SpawnPermissionsController.get_instance( CDSConfigStore.get_instance(self.config), self.db).can_user_spawn(self.user.orm_user): raise Exception('User {} is not allowed to spawn a server'.format( self.user.name)) return super().run_pre_spawn_hook()
class KubeSpawner(Spawner): """ Implement a JupyterHub spawner to spawn pods in a Kubernetes Cluster. """ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # By now, all the traitlets have been set, so we can use them to compute # other attributes # Use curl HTTPClient if available, else fall back to Simple one try: from tornado.curl_httpclient import CurlAsyncHTTPClient self.httpclient = CurlAsyncHTTPClient(max_clients=64) except ImportError: from tornado.simple_httpclient import SimpleAsyncHTTPClient self.httpclient = SimpleAsyncHTTPClient(max_clients=64) # FIXME: Support more than just kubeconfig self.request = request_maker() self.pod_name = self._expand_user_properties(self.pod_name_template) self.pvc_name = self._expand_user_properties(self.pvc_name_template) if self.hub_connect_ip: scheme, netloc, path, params, query, fragment = urlparse( self.hub.api_url) netloc = '{ip}:{port}'.format( ip=self.hub_connect_ip, port=self.hub_connect_port, ) self.accessible_hub_api_url = urlunparse( (scheme, netloc, path, params, query, fragment)) else: self.accessible_hub_api_url = self.hub.api_url if self.port == 0: # Our default port is 8888 self.port = 8888 namespace = Unicode(config=True, help=""" Kubernetes namespace to spawn user pods in. If running inside a kubernetes cluster with service accounts enabled, defaults to the current namespace. If not, defaults to 'default' """) def _namespace_default(self): """ Set namespace default to current namespace if running in a k8s cluster If not in a k8s cluster with service accounts enabled, default to 'default' """ ns_path = '/var/run/secrets/kubernetes.io/serviceaccount/namespace' if os.path.exists(ns_path): with open(ns_path) as f: return f.read().strip() return 'default' ip = Unicode('0.0.0.0', help=""" The IP address (or hostname) the single-user server should listen on. We override this from the parent so we can set a more sane default for the Kubernetes setup. """).tag(config=True) cmd = Command(['jupyterhub-singleuser'], allow_none=True, minlen=0, help=""" The command used for starting the single-user server. Provide either a string or a list containing the path to the startup script command. Extra arguments, other than this path, should be provided via `args`. This is usually set if you want to start the single-user server in a different python environment (with virtualenv/conda) than JupyterHub itself. Some spawners allow shell-style expansion here, allowing you to use environment variables. Most, including the default, do not. Consult the documentation for your spawner to verify! If set to None, Kubernetes will start the CMD that is specified in the Docker image being started. """).tag(config=True) pod_name_template = Unicode('jupyter-{username}-{userid}', config=True, help=""" Template to use to form the name of user's pods. {username} and {userid} are expanded to the escaped, dns-label safe username & integer user id respectively. This must be unique within the namespace the pods are being spawned in, so if you are running multiple jupyterhubs spawning in the same namespace, consider setting this to be something more unique. """) pvc_name_template = Unicode('claim-{username}-{userid}', config=True, help=""" Template to use to form the name of user's pvc. {username} and {userid} are expanded to the escaped, dns-label safe username & integer user id respectively. This must be unique within the namespace the pvc are being spawned in, so if you are running multiple jupyterhubs spawning in the same namespace, consider setting this to be something more unique. """) hub_connect_ip = Unicode(None, config=True, allow_none=True, help=""" IP/DNS hostname to be used by pods to reach out to the hub API. Defaults to `None`, in which case the `hub_ip` config is used. In kubernetes contexts, this is often not the same as `hub_ip`, since the hub runs in a pod which is fronted by a service. This IP should be something that pods can access to reach the hub process. This can also be through the proxy - API access is authenticated with a token that is passed only to the hub, so security is fine. Usually set to the service IP / DNS name of the service that fronts the hub pod (deployment/replicationcontroller/replicaset) Used together with `hub_connect_port` configuration. """) hub_connect_port = Integer(config=True, help=""" Port to use by pods to reach out to the hub API. Defaults to be the same as `hub_port`. In kubernetes contexts, this is often not the same as `hub_port`, since the hub runs in a pod which is fronted by a service. This allows easy port mapping, and some systems take advantage of it. This should be set to the `port` attribute of a service that is fronting the hub pod. """) def _hub_connect_port_default(self): """ Set default port on which pods connect to hub to be the hub port The hub needs to be accessible to the pods at this port. We default to the port the hub is listening on. This would be overriden in case some amount of port mapping is happening. """ return self.hub.server.port singleuser_extra_labels = Dict({}, config=True, help=""" Extra kubernetes labels to set on the spawned single-user pods. The keys and values specified here would be set as labels on the spawned single-user kubernetes pods. The keys and values must both be strings that match the kubernetes label key / value constraints. See https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ for more info on what labels are and why you might want to use them! """) singleuser_image_spec = Unicode('jupyter/singleuser:latest', config=True, help=""" Docker image spec to use for spawning user's containers. Defaults to `jupyter/singleuser:latest` Name of the container + a tag, same as would be used with a `docker pull` command. If tag is set to `latest`, kubernetes will check the registry each time a new user is spawned to see if there is a newer image available. If available, new image will be pulled. Note that this could cause long delays when spawning, especially if the image is large. If you do not specify a tag, whatever version of the image is first pulled on the node will be used, thus possibly leading to inconsistent images on different nodes. For all these reasons, it is recommended to specify a specific immutable tag for the imagespec. If your image is very large, you might need to increase the timeout for starting the single user container from the default. You can set this with: ``` c.KubeSpawner.start_timeout = 60 * 5 # Upto 5 minutes ``` """) singleuser_image_pull_policy = Unicode('IfNotPresent', config=True, help=""" The image pull policy of the docker container specified in singleuser_image_spec. Defaults to `IfNotPresent` which causes the Kubelet to NOT pull the image specified in singleuser_image_spec if it already exists, except if the tag is :latest. For more information on image pull policy, refer to http://kubernetes.io/docs/user-guide/images/ This configuration is primarily used in development if you are actively changing the singleuser_image_spec and would like to pull the image whenever a user container is spawned. """) singleuser_image_pull_secrets = Unicode(None, allow_none=True, config=True, help=""" The kubernetes secret to use for pulling images from private repository. Set this to the name of a Kubernetes secret containing the docker configuration required to pull the image specified in singleuser_image_spec. https://kubernetes.io/docs/user-guide/images/#specifying-imagepullsecrets-on-a-pod has more information on when and why this might need to be set, and what it should be set to. """) singleuser_uid = Union([Integer(), Callable()], allow_none=True, config=True, help=""" The UID to run the single-user server containers as. This UID should ideally map to a user that already exists in the container image being used. Running as root is discouraged. Instead of an integer, this could also be a callable that takes as one parameter the current spawner instance and returns an integer. The callable will be called asynchronously if it returns a future. Note that the interface of the spawner class is not deemed stable across versions, so using this functionality might cause your JupyterHub or kubespawner upgrades to break. If set to `None`, the user specified with the `USER` directive in the container metadata is used. """) singleuser_fs_gid = Union([Integer(), Callable()], allow_none=True, config=True, help=""" The GID of the group that should own any volumes that are created & mounted. A special supplemental group that applies primarily to the volumes mounted in the single-user server. In volumes from supported providers, the following things happen: 1. The owning GID will be the this GID 2. The setgid bit is set (new files created in the volume will be owned by this GID) 3. The permission bits are OR’d with rw-rw The single-user server will also be run with this gid as part of its supplemental groups. Instead of an integer, this could also be a callable that takes as one parameter the current spawner instance and returns an integer. The callable will be called asynchronously if it returns a future, rather than an int. Note that the interface of the spawner class is not deemed stable across versions, so using this functionality might cause your JupyterHub or kubespawner upgrades to break. You'll *have* to set this if you are using auto-provisioned volumes with most cloud providers. See [fsGroup](http://kubernetes.io/docs/api-reference/v1/definitions/#_v1_podsecuritycontext) for more details. """) volumes = List([], config=True, help=""" List of Kubernetes Volume specifications that will be mounted in the user pod. This list will be directly added under `volumes` in the kubernetes pod spec, so you should use the same structure. Each item in the list must have the following two keys: - name Name that'll be later used in the `volume_mounts` config to mount this volume at a specific path. - <name-of-a-supported-volume-type> (such as `hostPath`, `persistentVolumeClaim`, etc) The key name determines the type of volume to mount, and the value should be an object specifying the various options available for that kind of volume. See http://kubernetes.io/docs/user-guide/volumes/ for more information on the various kinds of volumes available and their options. Your kubernetes cluster must already be configured to support the volume types you want to use. {username} and {userid} are expanded to the escaped, dns-label safe username & integer user id respectively, wherever they are used. """) volume_mounts = List([], config=True, help=""" List of paths on which to mount volumes in the user notebook's pod. This list will be added to the values of the `volumeMounts` key under the user's container in the kubernetes pod spec, so you should use the same structure as that. Each item in the list should be a dictionary with at least these two keys: - mountPath The path on the container in which we want to mount the volume. - name The name of the volume we want to mount, as specified in the `volumes` config. See http://kubernetes.io/docs/user-guide/volumes/ for more information on how the volumeMount item works. {username} and {userid} are expanded to the escaped, dns-label safe username & integer user id respectively, wherever they are used. """) user_storage_capacity = Unicode(None, config=True, allow_none=True, help=""" The ammount of storage space to request from the volume that the pvc will mount to. This ammount will be the ammount of storage space the user has to work with on their notebook. If left blank, the kubespawner will not create a pvc for the pod. This will be added to the `resources: requests: storage:` in the k8s pod spec. See http://kubernetes.io/docs/user-guide/persistent-volumes/#persistentvolumeclaims for more information on how storage works. Quantities can be represented externally as unadorned integers, or as fixed-point integers with one of these SI suffices (E, P, T, G, M, K, m) or their power-of-two equivalents (Ei, Pi, Ti, Gi, Mi, Ki). For example, the following represent roughly 'the same value: 128974848, "129e6", "129M" , "123Mi". (https://github.com/kubernetes/kubernetes/blob/master/docs/design/resources.md) """) user_storage_class = Unicode(None, config=True, allow_none=True, help=""" The storage class that the pvc will use. If left blank, the kubespawner will not create a pvc for the pod. This will be added to the `annotations: volume.beta.kubernetes.io/storage-class:` in the pvc metadata. This will determine what type of volume the pvc will request to use. If one exists that matches the criteria of the StorageClass, the pvc will mount to that. Otherwise, b/c it has a storage class, k8s will dynamicallly spawn a pv for the pvc to bind to and a machine in the cluster for the pv to bind to. See http://kubernetes.io/docs/user-guide/persistent-volumes/#storageclasses for more information on how StorageClasses work. """) user_storage_access_modes = List(["ReadWriteOnce"], config=True, help=""" List of access modes the user has for the pvc. The access modes are: The access modes are: ReadWriteOnce – the volume can be mounted as read-write by a single node ReadOnlyMany – the volume can be mounted read-only by many nodes ReadWriteMany – the volume can be mounted as read-write by many nodes See http://kubernetes.io/docs/user-guide/persistent-volumes/#access-modes for more information on how access modes work. """) def _expand_user_properties(self, template): # Make sure username matches the restrictions for DNS labels safe_chars = set(string.ascii_lowercase + string.digits) safe_username = ''.join( [s if s in safe_chars else '-' for s in self.user.name.lower()]) return template.format(userid=self.user.id, username=safe_username) def _expand_all(self, src): if isinstance(src, list): return [self._expand_all(i) for i in src] elif isinstance(src, dict): return {k: self._expand_all(v) for k, v in src.items()} elif isinstance(src, str): return self._expand_user_properties(src) else: return src @gen.coroutine def get_pod_manifest(self): """ Make a pod manifest that will spawn current user's notebook pod. """ # Add a hack to ensure that no service accounts are mounted in spawned pods # This makes sure that we don't accidentally give access to the whole # kubernetes API to the users in the spawned pods. # See https://github.com/kubernetes/kubernetes/issues/16779#issuecomment-157460294 hack_volumes = [{'name': 'no-api-access-please', 'emptyDir': {}}] hack_volume_mounts = [{ 'name': 'no-api-access-please', 'mountPath': '/var/run/secrets/kubernetes.io/serviceaccount', 'readOnly': True }] if callable(self.singleuser_uid): singleuser_uid = yield gen.maybe_future(self.singleuser_uid(self)) else: singleuser_uid = self.singleuser_uid if callable(self.singleuser_fs_gid): singleuser_fs_gid = yield gen.maybe_future( self.singleuser_fs_gid(self)) else: singleuser_fs_gid = self.singleuser_fs_gid if self.cmd: real_cmd = self.cmd + self.get_args() else: real_cmd = None return make_pod_spec( self.pod_name, self.singleuser_image_spec, self.singleuser_image_pull_policy, self.singleuser_image_pull_secrets, self.port, real_cmd, singleuser_uid, singleuser_fs_gid, self.get_env(), self._expand_all(self.volumes) + hack_volumes, self._expand_all(self.volume_mounts) + hack_volume_mounts, self.singleuser_extra_labels, self.cpu_limit, self.cpu_guarantee, self.mem_limit, self.mem_guarantee, ) def get_pvc_manifest(self): """ Make a pvc manifest that will spawn current user's pvc. """ return make_pvc_spec(self.pvc_name, self.user_storage_class, self.user_storage_access_modes, self.user_storage_capacity) @gen.coroutine def get_pod_info(self, pod_name): """ Fetch info about a specific pod with the given pod name in current namespace Return `None` if pod with given name does not exist in current namespace """ try: response = yield self.httpclient.fetch( self.request(k8s_url( self.namespace, 'pods', pod_name, ))) except HTTPError as e: if e.code == 404: return None raise data = response.body.decode('utf-8') return json.loads(data) @gen.coroutine def get_pvc_info(self, pvc_name): """ Fetch info about a specific pvc with the given pvc name in current namespace Return `None` if pvc with given name does not exist in current namespace """ try: response = yield self.httpclient.fetch( self.request( k8s_url( self.namespace, 'persistentvolumeclaims', pvc_name, ))) except HTTPError as e: if e.code == 404: return None raise data = response.body.decode('utf-8') return json.loads(data) def is_pod_running(self, pod): """ Check if the given pod is running pod must be a dictionary representing a Pod kubernetes API object. """ return pod['status']['phase'] == 'Running' def get_state(self): """ Save state required to reinstate this user's pod from scratch We save the pod_name, even though we could easily compute it, because JupyterHub requires you save *some* state! Otherwise it assumes your server is dead. This works around that. It's also useful for cases when the pod_template changes between restarts - this keeps the old pods around. """ state = super().get_state() state['pod_name'] = self.pod_name return state def load_state(self, state): """ Load state from storage required to reinstate this user's pod Since this runs after __init__, this will override the generated pod_name if there's one we have saved in state. These are the same in most cases, but if the pod_template has changed in between restarts, it will no longer be the case. This allows us to continue serving from the old pods with the old names. """ if 'pod_name' in state: self.pod_name = state['pod_name'] @gen.coroutine def poll(self): """ Check if the pod is still running. Returns None if it is, and 1 if it isn't. These are the return values JupyterHub expects. """ data = yield self.get_pod_info(self.pod_name) if data is not None and self.is_pod_running(data): return None return 1 @gen.coroutine def start(self): if self.user_storage_class is not None and self.user_storage_capacity is not None: pvc_manifest = self.get_pvc_manifest() try: yield self.httpclient.fetch( self.request(url=k8s_url(self.namespace, 'persistentvolumeclaims'), body=json.dumps(pvc_manifest), method='POST', headers={'Content-Type': 'application/json'})) except: self.log.info("Pvc " + self.pvc_name + " already exists, so did not create new pvc.") # If we run into a 409 Conflict error, it means a pod with the # same name already exists. We stop it, wait for it to stop, and # try again. We try 4 times, and if it still fails we give up. # FIXME: Have better / cleaner retry logic! retry_times = 4 pod_manifest = yield self.get_pod_manifest() for i in range(retry_times): try: yield self.httpclient.fetch( self.request(url=k8s_url(self.namespace, 'pods'), body=json.dumps(pod_manifest), method='POST', headers={'Content-Type': 'application/json'})) break except HTTPError as e: if e.code != 409: # We only want to handle 409 conflict errors self.log.exception("Failed for %s", json.dumps(pod_manifest)) raise self.log.info('Found existing pod %s, attempting to kill', self.pod_name) yield self.stop(True) self.log.info( 'Killed pod %s, will try starting singleuser pod again', self.pod_name) else: raise Exception( 'Can not create user pod %s already exists & could not be deleted' % self.pod_name) while True: data = yield self.get_pod_info(self.pod_name) if data is not None and self.is_pod_running(data): break yield gen.sleep(1) return (data['status']['podIP'], self.port) @gen.coroutine def stop(self, now=False): body = { 'kind': "DeleteOptions", 'apiVersion': 'v1', 'gracePeriodSeconds': 0 } yield self.httpclient.fetch( self.request( url=k8s_url(self.namespace, 'pods', self.pod_name), method='DELETE', body=json.dumps(body), headers={'Content-Type': 'application/json'}, # Tornado's client thinks DELETE requests shouldn't have a body # which is a bogus restriction allow_nonstandard_methods=True, )) if not now: # If now is true, just return immediately, do not wait for # shut down to complete while True: data = yield self.get_pod_info(self.pod_name) if data is None: break yield gen.sleep(1) def _env_keep_default(self): return [] def get_args(self): args = super(KubeSpawner, self).get_args() # HACK: we wanna replace --hub-api-url=self.hub.api_url with # self.accessible_hub_api_url. This is required in situations where # the IP the hub is listening on (such as 0.0.0.0) is not the IP where # it can be reached by the pods (such as the service IP used for the hub!) # FIXME: Make this better? to_replace = '--hub-api-url="%s"' % (self.hub.api_url) for i in range(len(args)): if args[i] == to_replace: args[i] = '--hub-api-url="%s"' % (self.accessible_hub_api_url) break return args def get_env(self): # HACK: This is deprecated, and should be removed soon. # We set these to be compatible with DockerSpawner and earlie KubeSpawner env = super(KubeSpawner, self).get_env() env.update({ 'JPY_USER': self.user.name, 'JPY_COOKIE_NAME': self.user.server.cookie_name, 'JPY_BASE_URL': self.user.server.base_url, 'JPY_HUB_PREFIX': self.hub.server.base_url, 'JPY_HUB_API_URL': self.accessible_hub_api_url }) return env
class LDAPAuthenticator(Authenticator): """ LDAP Authenticator for Jupyterhub """ server_hosts = Union( [List(), Unicode()], config=True, help=""" List of Names, IPs, or the complete URLs in the scheme://hostname:hostport format of the server (required). """ ) server_port = Int( allow_none=True, default_value=None, config=True, help=""" The port where the LDAP server is listening. Typically 389, for a cleartext connection, and 636 for a secured connection (defaults to None). """ ) server_use_ssl = Bool( default_value=False, config=True, help=""" Boolean specifying if the connection is on a secure port (defaults to False). """ ) server_connect_timeout = Int( allow_none=True, default_value=None, config=True, help=""" Timeout in seconds permitted when establishing an ldap connection before raising an exception (defaults to None). """ ) server_receive_timeout = Int( allow_none=True, default_value=None, config=True, help=""" Timeout in seconds permitted for responses from established ldap connections before raising an exception (defaults to None). """ ) server_pool_strategy = Unicode( default_value='FIRST', config=True, help=""" Available Pool HA strategies (defaults to 'FIRST'). FIRST: Gets the first server in the pool, if 'server_pool_active' is set to True gets the first available server. ROUND_ROBIN: Each time the connection is open the subsequent server in the pool is used. If 'server_pool_active' is set to True unavailable servers will be discarded. RANDOM: each time the connection is open a random server is chosen in the pool. If 'server_pool_active' is set to True unavailable servers will be discarded. """ ) server_pool_active = Union( [Bool(), Int()], default_value=True, config=True, help=""" If True the ServerPool strategy will check for server availability. Set to Integer for maximum number of cycles to try before giving up (defaults to True). """ ) server_pool_exhaust = Union( [Bool(), Int()], default_value=False, config=True, help=""" If True, any inactive servers will be removed from the pool. If set to an Integer, this will be the number of seconds an unreachable server is considered offline. When this timeout expires the server is reinserted in the pool and checked again for availability (defaults to False). """ ) bind_user_dn = Unicode( allow_none=True, default_value=None, config=True, help=""" The account of the user to log in for simple bind (defaults to None). """ ) bind_user_password = Unicode( allow_none=True, default_value=None, config=True, help=""" The password of the user for simple bind (defaults to None) """ ) user_search_base = Unicode( config=True, help=""" The location in the Directory Information Tree where the user search will start. """ ) user_search_filter = Unicode( config=True, help=""" LDAP search filter to validate that the authenticating user exists within the organization. Search filters containing '{username}' will have that value substituted with the username of the authenticating user. """ ) user_membership_attribute = Unicode( default_value='memberOf', config=True, help=""" LDAP Attribute used to associate user group membership (defaults to 'memberOf'). """ ) group_search_base = Unicode( config=True, help=""" The location in the Directory Information Tree where the group search will start. Search string containing '{group}' will be substituted with entries taken from allow_nested_groups. """ ) group_search_filter = Unicode( config=True, help=""" LDAP search filter to return members of groups defined in the allowed_groups parameter. Search filters containing '{group}' will have that value substituted with the group dns provided in the allowed_groups parameter. """ ) allowed_groups = Union( [Unicode(), List()], config=True, help=""" List of LDAP group DNs that users must be a member of in order to be granted login. """ ) allow_nested_groups = Bool( default_value=False, config=True, help=""" Boolean allowing for recursive search of members within nested groups of allowed_groups (defaults to False). """ ) username_pattern = Unicode( config=True, help=""" Regular expression pattern that all valid usernames must match. If a username does not match the pattern specified here, authentication will not be attempted. If not set, allow any username (defaults to None). """ ) username_regex = Any( help=""" Compiled regex kept in sync with `username_pattern` """ ) @observe('username_pattern') def _username_pattern_changed(self, change): if not change['new']: self.username_regex = None self.username_regex = re.compile(change['new']) create_user_home_dir = Bool( default_value=False, config=True, help=""" If set to True, will attempt to create a user's home directory locally if that directory does not exist already. """ ) create_user_home_dir_cmd = Command( config=True, help=""" Command to create a users home directory. """ ) @default('create_user_home_dir_cmd') def _default_create_user_home_dir_cmd(self): if sys.platform == 'linux': home_dir_cmd = ['mkhomedir_helper'] else: self.log.debug("Not sure how to create a home directory on '%s' system", sys.platform) home_dir_cmd = [''] return home_dir_cmd @gen.coroutine def add_user(self, user): username = user.name user_exists = yield gen.maybe_future(self.user_home_dir_exists(username)) if not user_exists: if self.create_user_home_dir: yield gen.maybe_future(self.add_user_home_dir(username)) else: raise KeyError("Domain user '%s' does not exists locally." % username) yield gen.maybe_future(super().add_user(user)) def user_home_dir_exists(self, username): """ Verify user home directory exists """ user = pwd.getpwnam(username) home_dir = user[5] return bool(os.path.isdir(home_dir)) def add_user_home_dir(self, username): """ Creates user home directory """ cmd = [arg.replace('USERNAME', username) for arg in self.create_user_home_dir_cmd] + [username] self.log.info("Creating '%s' user home directory using command '%s'", username, ' '.join(map(pipes.quote, cmd))) create_dir = Popen(cmd, stdout=PIPE, stderr=STDOUT) create_dir.wait() if create_dir.returncode: err = create_dir.stdout.read().decode('utf8', 'replace') raise RuntimeError("Failed to create system user %s: %s" % (username, err)) def normalize_username(self, username): """ Normalize username for ldap query modifications: - format to lowercase - escape filter characters (ldap3) """ username = username.lower() username = escape_filter_chars(username) return username def validate_username(self, username): """ Validate a normalized username Return True if username is valid, False otherwise. """ if '/' in username: # / is not allowed in usernames return False if not username: # empty usernames are not allowed return False if not self.username_regex: return True return bool(self.username_regex.match(username)) def validate_host(self, host): """ Validate hostname Return True if host is valid, False otherwise. """ host_ip_regex = re.compile(r'^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$') host_name_regex = re.compile(r'^((?!-)[a-z0-9\-]{1,63}(?<!-)\.){1,}((?!-)[a-z0-9\-]{1,63}(?<!-)){1}$') host_url_regex = re.compile(r'^(ldaps?://)(((?!-)[a-z0-9\-]{1,63}(?<!-)\.){1,}((?!-)[a-z0-9\-]{1,63}(?<!-)){1}):([0-9]{3})$') if bool(host_ip_regex.match(host)): # using ipv4 address valid = True elif bool(host_name_regex.match(host)): # using a hostname address valid = True elif bool(host_url_regex.match(host)): # using host url address valid = True else: # unsupported host format valid = False return valid def create_ldap_server_pool_obj(self, ldap_servers=None): """ Create ldap3 ServerPool Object """ server_pool = ldap3.ServerPool( ldap_servers, pool_strategy=self.server_pool_strategy.upper(), active=self.server_pool_active, exhaust=self.server_pool_exhaust ) return server_pool def create_ldap_server_obj(self, host): """ Create ldap3 Server Object """ server = ldap3.Server( host, port=self.server_port, use_ssl=self.server_use_ssl, connect_timeout=self.server_connect_timeout ) return server def ldap_connection(self, server_pool, username, password): """ Create ldaps Connection Object """ try: conn = ldap3.Connection( server_pool, user=username, password=password, auto_bind=ldap3.AUTO_BIND_TLS_BEFORE_BIND, read_only=True, receive_timeout=self.server_receive_timeout) except ldap3.core.exceptions.LDAPBindError as exc: msg = '\n{exc_type}: {exc_msg}'.format( exc_type=exc.__class__.__name__, exc_msg=exc.args[0] if exc.args else '') self.log.error("Failed to connect to ldap: %s", msg) return None return conn def get_nested_groups(self, conn, group): """ Recursively search group for nested memberships """ nested_groups = list() conn.search( search_base=self.group_search_base, search_filter=self.group_search_filter.format(group=group), search_scope=ldap3.SUBTREE) if conn.response: for nested_group in conn.response: nested_groups.extend([nested_group['dn']]) groups = self.get_nested_groups(conn, nested_group['dn']) nested_groups.extend(groups) nested_groups = list(set(nested_groups)) return nested_groups @gen.coroutine def authenticate(self, handler, data): # define vars username = data['username'] password = data['password'] server_pool = self.create_ldap_server_pool_obj() conn_servers = list() # validate credentials username = self.normalize_username(username) if not self.validate_username(username): self.log.error('Unsupported username supplied') return None if password is None or password.strip() == '': self.log.error('Empty password supplied') return None # cast server_hosts to list if isinstance(self.server_hosts, str): self.server_hosts = self.server_hosts.split() # validate hosts and populate server_pool object for host in self.server_hosts: host = host.strip().lower() if not self.validate_host(host): self.log.warning("Host '%s' not supplied in approved format. Removing host from Server Pool", host) break server = self.create_ldap_server_obj(host) server_pool.add(server) conn_servers.extend([host]) # verify ldap connection object parameters are defined if len(server_pool.servers) < 1: self.log.error("No hosts provided. ldap connection requires at least 1 host to connect to.") return None if not self.bind_user_dn or self.bind_user_dn.strip() == '': self.log.error("'bind_user_dn' config value undefined. requried for ldap connection") return None if not self.bind_user_password or self.bind_user_password.strip() == '': self.log.error("'bind_user_password' config value undefined. requried for ldap connection") return None # verify ldap search object parameters are defined if not self.user_search_base or self.user_search_base.strip() == '': self.log.error("'user_search_base' config value undefined. requried for ldap search") return None if not self.user_search_filter or self.user_search_filter.strip() == '': self.log.error("'user_search_filter' config value undefined. requried for ldap search") return None # open ldap connection and authenticate self.log.debug("Attempting ldap connection to %s with user '%s'", conn_servers, self.bind_user_dn) conn = self.ldap_connection( server_pool, self.bind_user_dn, self.bind_user_password) # proceed if connection has been established if not conn or not conn.bind(): self.log.error( "Could not establish ldap connection to %s using '%s' and supplied bind_user_password.", conn_servers, self.bind_user_dn) return None else: self.log.debug( "Successfully established connection to %s with user '%s'", conn_servers, self.bind_user_dn) # compile list of permitted groups permitted_groups = copy.deepcopy(self.allowed_groups) if self.allow_nested_groups: for group in self.allowed_groups: nested_groups = self.get_nested_groups(conn, group) permitted_groups.extend(nested_groups) # format user search filter auth_user_search_filter = self.user_search_filter.format( username=username) # search for authenticating user in ldap self.log.debug("Attempting LDAP search using search_filter '%s'.", auth_user_search_filter) conn.search( search_base=self.user_search_base, search_filter=auth_user_search_filter, search_scope=ldap3.SUBTREE, attributes=self.user_membership_attribute, paged_size=2) # handle abnormal search results if not conn.response or 'attributes' not in conn.response[0].keys(): self.log.error( "LDAP search '%s' found %i result(s).", auth_user_search_filter, len(conn.response)) return None elif len(conn.response) > 1: self.log.error( "LDAP search '%s' found %i result(s). Please narrow search to 1 result.", auth_user_search_filter, len(conn.response)) return None else: self.log.debug("LDAP search '%s' found %i result(s).", auth_user_search_filter, len(conn.response)) # copy response to var search_response = copy.deepcopy(conn.response[0]) # get authenticating user's ldap attributes if not search_response['dn'] or search_response['dn'].strip == '': self.log.error( "Search results for user '%s' returned 'dn' attribute with undefined or null value.", username) conn.unbind() return None else: self.log.debug( "Search results for user '%s' returned 'dn' attribute as '%s'", username, search_response['dn']) auth_user_dn = search_response['dn'] if not search_response['attributes'][self.user_membership_attribute]: self.log.error( "Search results for user '%s' returned '%s' attribute with undefned or null value.", username, self.user_membership_attribute) conn.unbind() return None else: self.log.debug( "Search results for user '%s' returned '%s' attribute as %s", username, self.user_membership_attribute, search_response['attributes'][self.user_membership_attribute]) auth_user_memberships = search_response['attributes'][self.user_membership_attribute] # is authenticating user a member of permitted_groups allowed_memberships = list(set(auth_user_memberships).intersection(permitted_groups)) if bool(allowed_memberships): self.log.debug( "User '%s' found in the following allowed ldap groups %s. Proceeding with authentication.", username, allowed_memberships) # rebind ldap connection with authenticating user, gather results, and close connection conn.rebind( user=auth_user_dn, password=password) auth_bound = copy.deepcopy(conn.bind()) conn.unbind() if not auth_bound: self.log.error( "Could not establish ldap connection to %s using '%s' and supplied bind_user_password.", conn_servers, self.bind_user_dn) auth_response = None else: self.log.info("User '%s' sucessfully authenticated against ldap server %r.", username, conn_servers) auth_response = username else: self.log.error("User '%s' is not a member of any permitted groups %s", username, permitted_groups) auth_response = None permitted_groups = None return auth_response
class H2OSpawner(Spawner): """ A Spawner that uses `subprocess.Popen` to start single-user servers as local processes. Requires local UNIX users matching the authenticated users to exist. Does not work on Windows. This is the default spawner for JupyterHub. """ interrupt_timeout = Integer(10, help=""" Seconds to wait for single-user server process to halt after SIGINT. If the process has not exited cleanly after this many seconds, a SIGTERM is sent. """ ).tag(config=True) term_timeout = Integer(5, help=""" Seconds to wait for single-user server process to halt after SIGTERM. If the process does not exit cleanly after this many seconds of SIGTERM, a SIGKILL is sent. """ ).tag(config=True) kill_timeout = Integer(5, help=""" Seconds to wait for process to halt after SIGKILL before giving up. If the process does not exit cleanly after this many seconds of SIGKILL, it becomes a zombie process. The hub process will log a warning and then give up. """ ).tag(config=True) popen_kwargs = Dict( help="""Extra keyword arguments to pass to Popen when spawning single-user servers. For example:: popen_kwargs = dict(shell=True) """ ).tag(config=True) shell_cmd = Command(minlen=0, help="""Specify a shell command to launch. The single-user command will be appended to this list, so it sould end with `-c` (for bash) or equivalent. For example:: c.H2OSpawner.shell_cmd = ['bash', '-l', '-c'] to launch with a bash login shell, which would set up the user's own complete environment. .. warning:: Using shell_cmd gives users control over PATH, etc., which could change what the jupyterhub-singleuser launch command does. Only use this for trusted users. """ ) proc = Instance(Popen, allow_none=True, help=""" The process representing the single-user server process spawned for current user. Is None if no process has been spawned yet. """) pid = Integer(0, help=""" The process id (pid) of the single-user server process spawned for current user. """ ) h2ocmd = Command(['java', '-jar', '/Users/c2j/Projects/h2oai/h2o-3.16.0.2/h2o.jar'], allow_none=True, help=""" The command used for starting the single-user server. Provide either a string or a list containing the path to the startup script command. Extra arguments, other than this path, should be provided via `args`. This is usually set if you want to start the single-user server in a different python environment (with virtualenv/conda) than JupyterHub itself. Some spawners allow shell-style expansion here, allowing you to use environment variables. Most, including the default, do not. Consult the documentation for your spawner to verify! """ ).tag(config=True) def make_preexec_fn(self, name): """ Return a function that can be used to set the user id of the spawned process to user with name `name` This function can be safely passed to `preexec_fn` of `Popen` """ return set_user_setuid(name) def load_state(self, state): """Restore state about spawned single-user server after a hub restart. Local processes only need the process id. """ super(H2OSpawner, self).load_state(state) if 'pid' in state: self.pid = state['pid'] def get_state(self): """Save state that is needed to restore this spawner instance after a hub restore. Local processes only need the process id. """ state = super(H2OSpawner, self).get_state() if self.pid: state['pid'] = self.pid return state def clear_state(self): """Clear stored state about this spawner (pid)""" super(H2OSpawner, self).clear_state() self.pid = 0 def user_env(self, env): """Augment environment of spawned process with user specific env variables.""" import pwd env['USER'] = self.user.name home = pwd.getpwnam(self.user.name).pw_dir shell = pwd.getpwnam(self.user.name).pw_shell # These will be empty if undefined, # in which case don't set the env: if home: env['HOME'] = home if shell: env['SHELL'] = shell return env def get_env(self): """Get the complete set of environment variables to be set in the spawned process.""" env = super().get_env() env = self.user_env(env) return env def get_h2oargs(self): """Return the arguments to be passed after self.cmd Doesn't expect shell expansion to happen. """ args = [] if self.ip: args.append('-ip "%s"' % self.ip) if self.port: args.append('-port') args.append('%i' % self.port) elif self.server.port: self.log.warning("Setting port from user.server is deprecated as of JupyterHub 0.7.") args.append('-port %i' % self.server.port) if self.notebook_dir: notebook_dir = self.format_string(self.notebook_dir) args.append('--notebook-dir="%s"' % notebook_dir) if self.default_url: default_url = self.format_string(self.default_url) args.append('--NotebookApp.default_url="%s"' % default_url) if self.debug: args.append('--debug') if self.disable_user_config: args.append('--disable-user-config') args.append('-context_path') args.append('/user/%s' % self.user.name) args.extend(self.args) return args @gen.coroutine def start(self): """Start the single-user server.""" self.port = random_port() cmd = [] env = self.get_env() cmd.extend(self.h2ocmd) cmd.extend(self.get_h2oargs()) if self.shell_cmd: # using shell_cmd (e.g. bash -c), # add our cmd list as the last (single) argument: cmd = self.shell_cmd + [' '.join(pipes.quote(s) for s in cmd)] self.log.info("Spawning %s", ' '.join(pipes.quote(s) for s in cmd)) popen_kwargs = dict( preexec_fn=self.make_preexec_fn(self.user.name), start_new_session=True, # don't forward signals ) popen_kwargs.update(self.popen_kwargs) # don't let user config override env popen_kwargs['env'] = env try: self.proc = Popen(cmd, **popen_kwargs) except PermissionError: # use which to get abspath script = shutil.which(cmd[0]) or cmd[0] self.log.error("Permission denied trying to run %r. Does %s have access to this file?", script, self.user.name, ) raise self.pid = self.proc.pid if self.__class__ is not H2OSpawner: # subclasses may not pass through return value of super().start, # relying on deprecated 0.6 way of setting ip, port, # so keep a redundant copy here for now. # A deprecation warning will be shown if the subclass # does not return ip, port. if self.ip: self.server.ip = self.ip self.server.port = self.port return (self.ip or '127.0.0.1', self.port) @gen.coroutine def poll(self): """Poll the spawned process to see if it is still running. If the process is still running, we return None. If it is not running, we return the exit code of the process if we have access to it, or 0 otherwise. """ # if we started the process, poll with Popen if self.proc is not None: status = self.proc.poll() if status is not None: # clear state if the process is done self.clear_state() return status # if we resumed from stored state, # we don't have the Popen handle anymore, so rely on self.pid if not self.pid: # no pid, not running self.clear_state() return 0 # send signal 0 to check if PID exists # this doesn't work on Windows, but that's okay because we don't support Windows. alive = yield self._signal(0) if not alive: self.clear_state() return 0 else: return None @gen.coroutine def _signal(self, sig): """Send given signal to a single-user server's process. Returns True if the process still exists, False otherwise. The hub process is assumed to have enough privileges to do this (e.g. root). """ try: os.kill(self.pid, sig) except OSError as e: if e.errno == errno.ESRCH: return False # process is gone else: raise return True # process exists @gen.coroutine def stop(self, now=False): """Stop the single-user server process for the current user. If `now` is False (default), shutdown the server as gracefully as possible, e.g. starting with SIGINT, then SIGTERM, then SIGKILL. If `now` is True, terminate the server immediately. The coroutine should return when the process is no longer running. """ if not now: status = yield self.poll() if status is not None: return self.log.debug("Interrupting %i", self.pid) yield self._signal(signal.SIGINT) yield self.wait_for_death(self.interrupt_timeout) # clean shutdown failed, use TERM status = yield self.poll() if status is not None: return self.log.debug("Terminating %i", self.pid) yield self._signal(signal.SIGTERM) yield self.wait_for_death(self.term_timeout) # TERM failed, use KILL status = yield self.poll() if status is not None: return self.log.debug("Killing %i", self.pid) yield self._signal(signal.SIGKILL) yield self.wait_for_death(self.kill_timeout) status = yield self.poll() if status is None: # it all failed, zombie process self.log.warning("Process %i never died", self.pid)
class SingularitySpawner(LocalProcessSpawner): """SingularitySpawner - extends the default LocalProcessSpawner to allow for: 1) User-specification of a singularity image via the Spawner options form 2) Spawning a Notebook server within a Singularity container """ singularity_cmd = Command(['/usr/local/bin/singularity', 'exec'], help=""" This is the singularity command that will be executed when starting the single-user server. The image path and notebook server args will be concatenated to the end of this command. This is a good place to specify any site-specific options that should be applied to all users, such as default mounts. """).tag(config=True) default_singularity_options = Unicode( '--writable --fakeroot --no-home --no-privs', help=""" This is the singularity command that will be executed when starting the single-user server. The image path and notebook server args will be concatenated to the end of this command. This is a good place to specify any site-specific options that should be applied to all users, such as default mounts. """).tag(config=True) notebook_cmd = Unicode('jupyterhub-singleuser', help=""" The command used for starting the single-user server. Provide either a string or a list containing the path to the startup script command. Extra arguments, other than this path, should be provided via `args`. """).tag(config=True) default_image_path = Unicode('', help=""" Absolute POSIX filepath to Singularity image that will be used to execute the notebook server spawn command, if another path is not specified by the user. """).tag(config=True) pull_from_url = Bool(False, help=""" If set to True, the user should be presented with URI specification options, and the spawner should first pull a new image from the specified shub or docker URI prior to running the notebook command. In this configuration, the `user_image_path` will specify where the new container will be created. """).tag(config=True) default_image_url = Unicode('docker://jupyter/base-notebook', help=""" Singularity Hub or Docker URI from which the notebook image will be pulled, if no other URI is specified by the user but the _pull_ option has been selected. """).tag(config=True) default_gateway_address = Unicode('', help=""" Singularity Hub or Docker URI from which the notebook image will be pulled, if no other URI is specified by the user but the _pull_ option has been selected. """).tag(config=True) options_form = Unicode() form_template = Unicode(""" <div class="checkbox"> <label> <input id="pull-checkbox" type="checkbox" value="pull" name="pull_from_url">Pull from URL </label> </div> <div id="url-group" class="form-group" hidden> <label for="user_image_url"> Specify the image URL to pull from: </label> <input class="form-control" name="user_image_url" value="{default_image_url}"> </div> <div class="form-group"> <label id="path-label" for="user_image_path"> Specify the Singularity image to use (absolute filepath): </label> <input class="form-control" name="user_image_path" value="{default_image_path}" required autofocus> <label id="gateway-label" for="user_gateway_address"> Gateway Url (optional): </label> <input class="form-control" name="user_gateway_address" value="{default_gateway_address}" autofocus> </div> """) #<input class="form-control" name="user_singularity_options" value="{default_singularity_options}" required autofocus> # <input class="form-control" name="notebook_cmd" value="{notebook_cmd}" required autofocus> def format_default_image_path(self): """Format the image path template string.""" format_options = dict(username=self.user.escaped_name) default_image_path = self.default_image_path.format(**format_options) return default_image_path @default('options_form') def _options_form(self): """Render the options form.""" default_image_path = self.format_default_image_path() format_options = dict( default_image_path=default_image_path, default_image_url=self.default_image_url, default_gateway_address=self.default_gateway_address) options_form = self.form_template.format(**format_options) return JS_SCRIPT + options_form def options_from_form(self, form_data): """Get data from options form input fields.""" user_image_path = form_data.get('user_image_path', None) user_image_url = form_data.get('user_image_url', None) pull_from_url = form_data.get('pull_from_url', False) user_gateway_address = form_data.get('user_gateway_address', None) return dict(user_image_path=user_image_path, user_image_url=user_image_url, pull_from_url=pull_from_url, user_gateway_address=user_gateway_address) def get_image_url(self): """Get image URL to pull image from user options or default.""" default_image_url = self.default_image_url image_url = self.user_options.get('user_image_url', [default_image_url]) return image_url def get_image_path(self): """Get image filepath specified in user options else default.""" default_image_path = self.format_default_image_path() image_path = self.user_options.get('user_image_path', [default_image_path]) return image_path def get_gateway_address(self): """Get image filepath specified in user options else default.""" default_gateway_address = self.default_gateway_address user_gateway_address = self.user_options.get('user_gateway_address', [default_gateway_address]) user_gateway_address = user_gateway_address[0] return user_gateway_address @gen.coroutine def pull_image(self, image_url): """Pull the singularity image to specified image path.""" image_path = self.get_image_path() self.log.info("creating sandbox for notebook") try: build_cmd = sp.Popen([ 'singularity', 'build', '--force', '--fakeroot', '--sandbox', image_path[0], image_url[0] ], preexec_fn=set_user_setuid(self.user.name)) (output, err) = build_cmd.communicate() #res=Client.build(image_url[0],image_path[0],sandbox=True,sudo=False,options=['--fakeroot'],return_result=True) self.log.debug(output) except: self.log.error(res) return Unicode(image_path) def _build_cmd(self): image_path = self.get_image_path() options = [] options_in = self.default_singularity_options.split(" ") options.extend(options_in) user_home = os.path.expanduser('~' + self.user.name) mount_dir = os.path.join(user_home, 'jupyter_mounts') target_mount_dir = os.path.join("/home", self.user.name, 'mnt') bind_opts = [] if '--no-home' in options_in: bind_opts.append(f"{user_home}:/home/{self.user.name}") if os.path.exists(mount_dir): bind_opts.extend([ os.path.join(mount_dir, d) + ":" + os.path.join(target_mount_dir, d) for d in os.listdir(mount_dir) if os.path.islink(os.path.join(mount_dir, d)) ]) if bind_opts: options.append('--bind') options.append(','.join(bind_opts)) cmd = [] cmd.extend(self.singularity_cmd) cmd.extend(options) cmd.extend(image_path) cmd.extend([self.notebook_cmd]) return cmd @property def cmd(self): return self._build_cmd() def get_env(self): env = super().get_env() env['KERNEL_USERNAME'] = self.user.name gateway = self.get_gateway_address() if gateway: env['JUPYTER_GATEWAY_URL'] = gateway.strip() return env @gen.coroutine def start(self): """ Start the single-user server in the Singularity container specified by image path, pulling from docker or shub first if the pull option is selected. """ image_path = self.get_image_path() pull_from_url = self.user_options.get('pull_from_url', False) if not os.path.exists(image_path[0]): pull_from_url = True if pull_from_url: image_url = self.get_image_url() self.pull_image(image_url) (self.ip, self.port) = yield super(SingularitySpawner, self).start() return (self.ip, self.port)
class LDAPAuthenticator(Authenticator): """ LDAP Authenticator for Jupyterhub """ server_hosts = Union([List(), Unicode()], config=True, help=""" List of Names, IPs, or the complete URLs in the scheme://host:port format of the server (required). """) server_port = Int(allow_none=True, default_value=None, config=True, help=""" The port where the LDAP server is listening. Typically 389, for a cleartext connection, and 636 for a secured connection (defaults to None). """) server_use_ssl = Bool(default_value=False, config=True, help=""" Boolean specifying if the connection is on a secure port (defaults to False). """) server_connect_timeout = Int(allow_none=True, default_value=None, config=True, help=""" Timeout in seconds permitted when establishing an ldap connection before raising an exception (defaults to None). """) server_receive_timeout = Int(allow_none=True, default_value=None, config=True, help=""" Timeout in seconds permitted for responses from established ldap connections before raising an exception (defaults to None). """) server_pool_strategy = Unicode(default_value='FIRST', config=True, help=""" Available Pool HA strategies (defaults to 'FIRST'). FIRST: Gets the first server in the pool, if 'server_pool_active' is set to True gets the first available server. ROUND_ROBIN: Each time the connection is open the subsequent server in the pool is used. If 'server_pool_active' is set to True unavailable servers will be discarded. RANDOM: each time the connection is open a random server is chosen in the pool. If 'server_pool_active' is set to True unavailable servers will be discarded. """) server_pool_active = Union([Bool(), Int()], default_value=True, config=True, help=""" If True the ServerPool strategy will check for server availability. Set to Integer for maximum number of cycles to try before giving up (defaults to True). """) server_pool_exhaust = Union([Bool(), Int()], default_value=False, config=True, help=""" If True, any inactive servers will be removed from the pool. If set to an Integer, this will be the number of seconds an unreachable server is considered offline. When this timeout expires the server is reinserted in the pool and checked again for availability (defaults to False). """) bind_user_dn = Unicode(allow_none=True, default_value=None, config=True, help=""" The account of the user to log in for simple bind (defaults to None). """) bind_user_password = Unicode(allow_none=True, default_value=None, config=True, help=""" The password of the user for simple bind (defaults to None) """) user_search_base = Unicode(config=True, help=""" The location in the Directory Information Tree where the user search will start. """) user_search_filter = Unicode(config=True, help=""" LDAP search filter to validate that the authenticating user exists within the organization. Search filters containing '{username}' will have that value substituted with the username of the authenticating user. """) user_membership_attribute = Unicode(default_value='memberOf', config=True, help=""" LDAP Attribute used to associate user group membership (defaults to 'memberOf'). """) group_search_base = Unicode(config=True, help=""" The location in the Directory Information Tree where the group search will start. Search string containing '{group}' will be substituted with entries taken from allow_nested_groups. """) group_search_filter = Unicode(config=True, help=""" LDAP search filter to return members of groups defined in the allowed_groups parameter. Search filters containing '{group}' will have that value substituted with the group dns provided in the allowed_groups parameter. """) allowed_groups = Union([Unicode(), List()], allow_none=True, default_value=None, config=True, help=""" List of LDAP group DNs that users must be a member of in order to be granted login. """) allow_nested_groups = Bool(default_value=False, config=True, help=""" Boolean allowing for recursive search of members within nested groups of allowed_groups (defaults to False). """) username_pattern = Unicode(config=True, help=""" Regular expression pattern that a valid username must match. If a username does not match the pattern specified here, authentication will not be attempted. If not set, allow any username (defaults to None). """) username_regex = Any(help=""" Compiled regex kept in sync with `username_pattern` """) @observe('username_pattern') def _username_pattern_changed(self, change: dict) -> None: if not change['new']: self.username_regex = None self.username_regex = re.compile(change['new']) create_user_home_dir = Bool(default_value=False, config=True, help=""" If set to True, will attempt to create a user's home directory locally if that directory does not exist already. """) create_user_home_dir_cmd = Command(config=True, help=""" Command to create a users home directory. The command should be formatted as a list of strings. """) @default('create_user_home_dir_cmd') def _default_create_user_home_dir_cmd(self) -> typing.List[str]: if sys.platform == 'linux': home_dir_cmd = ['mkhomedir_helper'] else: self.log.debug( "Not sure how to create a home directory on '{}' system". format(sys.platform)) home_dir_cmd = list() return home_dir_cmd @gen.coroutine def add_user(self, user: User) -> typing.Generator: if self.create_user_home_dir: username = user.name user_exists = yield gen.maybe_future( self.user_home_dir_exists(username)) if not user_exists: yield gen.maybe_future(self.add_user_home_dir(username)) yield gen.maybe_future(super().add_user(user)) def user_home_dir_exists(self, username: str) -> bool: """ Verify user home directory exists """ try: user = pwd.getpwnam(username) home_dir = user.pw_dir return os.path.isdir(home_dir) except KeyError: return False def add_user_home_dir(self, username: str) -> None: """ Creates user home directory """ cmd = self.create_user_home_dir_cmd + [username] self.log.info( "Creating '{}' user home directory using command '{}'".format( username, ' '.join(cmd))) proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) out, err = proc.communicate() if proc.returncode: raise RuntimeError( "Failed to create '{}' user home directory: {}".format( username, err)) def validate_username(self, username: str) -> bool: """ Validate a username Return True if username is valid, False otherwise. """ if '/' in username: # / is not allowed in usernames return False if not username: # empty usernames are not allowed return False if not self.username_regex: return True return bool(self.username_regex.match(username)) def validate_host(self, host: str) -> bool: """ Validate hostname Return True if host is valid, False otherwise. """ ip_address_regex = re.compile( r'^(?:(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])\.){3}' r'(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])$') hostname_regex = re.compile( r'^(?:[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?\.)+' r'[a-z0-9][a-z0-9-]{0,61}[a-z0-9]$') url_regex = re.compile(r'^(ldaps?)://' r'((?:[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?\.)+' r'[a-z0-9][a-z0-9-]{0,61}[a-z0-9]):' r'([0-9]{1,5})$') if bool(ip_address_regex.match(host)): # using ipv4 address valid = True elif bool(hostname_regex.match(host)): # using a hostname address valid = True elif bool(url_regex.match(host)): # using host url address match = url_regex.match(host) proto = match.group(1) if proto == 'ldaps': self.server_use_ssl = True valid = True else: # unsupported host format valid = False return valid def create_ldap_server_pool_obj(self, ldap_servers: typing.List[str] = None ) -> ldap3.ServerPool: """ Create ldap3 ServerPool Object """ server_pool = ldap3.ServerPool( ldap_servers, pool_strategy=self.server_pool_strategy.upper(), active=self.server_pool_active, exhaust=self.server_pool_exhaust) return server_pool def create_ldap_server_obj(self, host: str) -> ldap3.Server: """ Create ldap3 Server Object """ server = ldap3.Server(host, port=self.server_port, use_ssl=self.server_use_ssl, connect_timeout=self.server_connect_timeout) return server def ldap_connection(self, server_pool: ldap3.ServerPool, username: str, password: str) -> ldap3.Connection: """ Create ldap(s) Connection Object """ # attempt connection try: conn = ldap3.Connection( server_pool, user=username, password=password, auto_bind=ldap3.AUTO_BIND_NO_TLS, read_only=True, receive_timeout=self.server_receive_timeout) except ldap3.core.exceptions.LDAPBindError as exc: msg = "\n{exc_type}: {exc_msg}".format( exc_type=exc.__class__.__name__, exc_msg=exc.args[0] if exc.args else '') self.log.error("Failed to connect to ldap: {}".format(msg)) conn = None return conn def get_nested_groups(self, conn, group: str) -> typing.List[str]: """ Recursively search group for nested memberships """ nested_groups = list() conn.search(search_base=self.group_search_base, search_filter=self.group_search_filter.format(group=group), search_scope=ldap3.SUBTREE) if conn.response: for nested_group in conn.response: if 'dn' in nested_group: nested_groups.extend([nested_group['dn']]) groups = self.get_nested_groups(conn, nested_group['dn']) nested_groups.extend(groups) nested_groups = list(set(nested_groups)) return nested_groups def test_auth(self, conn: ldap3.Connection, auth_user_dn: str, password: str) -> bool: """ Test User Authentication rebind ldap connection with authenticating user, gather results, and close connection """ try: auth_bound = conn.rebind(user=auth_user_dn, password=password) except ldap3.core.exceptions.LDAPBindError: auth_bound = False finally: conn.unbind() return auth_bound @gen.coroutine def authenticate(self, handler: web.RequestHandler, data: dict) -> typing.Optional[str]: # define vars username = data['username'] password = data['password'] server_pool = self.create_ldap_server_pool_obj() conn_servers = list() # validate credentials username = username.lower() if not self.validate_username(username): self.log.error('Unsupported username supplied') return None if not password or not password.strip(): self.log.error('Empty password supplied') return None # cast server_hosts to list if isinstance(self.server_hosts, str): self.server_hosts = self.server_hosts.split(',') # validate hosts and populate server_pool object for host in self.server_hosts: host = host.strip().lower() if not self.validate_host(host): self.log.warning( ("Host '{}' not supplied in approved format. " + "Removing host from Server Pool").format(host)) break server = self.create_ldap_server_obj(host) server_pool.add(server) conn_servers.extend([host]) # verify ldap connection object parameters are defined if not server_pool.servers: self.log.error( "No hosts provided. ldap connection requires at least 1 host to connect to." ) return None if self.bind_user_dn is None or not self.bind_user_dn.strip(): self.log.error( "'bind_user_dn' config value undefined. required for ldap connection" ) return None if self.bind_user_password is None or not self.bind_user_password.strip( ): self.log.error( "'bind_user_password' config value undefined. required for ldap connection" ) return None # verify ldap search object parameters are defined if not self.user_search_base or not self.user_search_base.strip(): self.log.error( "'user_search_base' config value undefined. required for ldap search" ) return None if not self.user_search_filter or not self.user_search_filter.strip(): self.log.error( "'user_search_filter' config value undefined. required for ldap search" ) return None # open ldap connection and authenticate self.log.debug( "Attempting ldap connection to {} with user '{}'".format( conn_servers, self.bind_user_dn)) conn = self.ldap_connection(server_pool, self.bind_user_dn, self.bind_user_password) # proceed if connection has been established if not conn or not conn.bind(): self.log.error( ("Could not establish ldap connection to {} using '{}' " + "and supplied bind_user_password.").format( conn_servers, self.bind_user_dn)) return None else: self.log.debug( "Successfully established connection to {} with user '{}'". format(conn_servers, self.bind_user_dn)) # format user search filter auth_user_search_filter = self.user_search_filter.format( username=username) # search for authenticating user in ldap self.log.debug( "Attempting LDAP search using search_filter '{}'.".format( auth_user_search_filter)) conn.search(search_base=self.user_search_base, search_filter=auth_user_search_filter, search_scope=ldap3.SUBTREE, attributes=self.user_membership_attribute if self.allowed_groups else list(), paged_size=2) # handle abnormal search results if not conn.response or len(conn.response) > 1: self.log.error(("LDAP search '{}' returned {} results. " + "Please narrow search to 1 result").format( auth_user_search_filter, len(conn.response))) return None elif self.allowed_groups and 'attributes' not in conn.response[ 0].keys(): self.log.error( ("LDAP search '{}' did not return results for requested " + "search attribute(s) '{}'").format( auth_user_search_filter, self.user_membership_attribute)) return None else: self.log.debug("LDAP search '{}' found {} result(s).".format( auth_user_search_filter, len(conn.response))) # copy response to var search_response = conn.response[0] # get authenticating user's ldap attributes if 'dn' not in search_response or not search_response[ 'dn'].strip(): self.log.error(( "Search results for user '{}' returned 'dn' attribute with " + "undefined or null value.").format(username)) conn.unbind() return None else: self.log.debug( "Search results for user '{}' returned 'dn' attribute as '{}'" .format(username, search_response['dn'])) auth_user_dn = search_response['dn'] # is authenticating user allowed if self.allowed_groups: # compile list of user groups if not search_response['attributes'][ self.user_membership_attribute]: self.log.error(( "Search results for user '{}' returned '{}' attribute " + "with undefined or null value.").format( username, self.user_membership_attribute)) conn.unbind() return None else: self.log.debug( "Search results for user '{}' returned '{}' attribute as {}" .format( username, self.user_membership_attribute, search_response['attributes'][ self.user_membership_attribute])) user_groups = search_response['attributes'][ self.user_membership_attribute] # compile list of permitted groups permitted_groups = list() permitted_groups.extend(self.allowed_groups) if self.allow_nested_groups: for group in self.allowed_groups: nested_groups = self.get_nested_groups(conn, group) permitted_groups.extend(nested_groups) # is authenticating user a member of permitted_groups allowed_memberships = list( set(user_groups).intersection(permitted_groups)) if allowed_memberships: self.log.debug(( "User '{}' found in the following allowed ldap groups " + "{}. Proceeding with authentication.").format( username, allowed_memberships)) else: self.log.error( "User '{}' is not a member of any permitted groups {}" .format(username, permitted_groups)) return None else: self.log.debug(( "User '{}' will not be verified against allowed_groups due " + "to feature short-circuiting. Proceeding with " + "authentication.").format(username)) # return auth results auth_bound = self.test_auth(conn, auth_user_dn, password) or False if auth_bound: self.log.info( "User '{}' successfully authenticated against ldap server {}." .format(username, conn_servers)) auth_response = username else: self.log.error( "User '{}' authentication failed against ldap server {}." .format(conn_servers, auth_user_dn)) auth_response = None return auth_response
class KubeSpawner(Spawner): """ Implement a JupyterHub spawner to spawn pods in a Kubernetes Cluster. """ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # By now, all the traitlets have been set, so we can use them to compute # other attributes self.executor = SingletonExecutor.instance( max_workers=self.k8s_api_threadpool_workers) # This will start watching in __init__, so it'll start the first # time any spawner object is created. Not ideal but works! self.pod_reflector = PodReflector.instance(parent=self, namespace=self.namespace) self.api = client.CoreV1Api() self.pod_name = self._expand_user_properties(self.pod_name_template) self.pvc_name = self._expand_user_properties(self.pvc_name_template) if self.hub_connect_ip: scheme, netloc, path, params, query, fragment = urlparse( self.hub.api_url) netloc = '{ip}:{port}'.format( ip=self.hub_connect_ip, port=self.hub_connect_port, ) self.accessible_hub_api_url = urlunparse( (scheme, netloc, path, params, query, fragment)) else: self.accessible_hub_api_url = self.hub.api_url if self.port == 0: # Our default port is 8888 self.port = 8888 k8s_api_threadpool_workers = Integer( # Set this explicitly, since this is the default in Python 3.5+ # but not in 3.4 5 * multiprocessing.cpu_count(), config=True, help=""" Number of threads in thread pool used to talk to the k8s API. Increase this if you are dealing with a very large number of users. Defaults to '5 * cpu_cores', which is the default for ThreadPoolExecutor. """) namespace = Unicode(config=True, help=""" Kubernetes namespace to spawn user pods in. If running inside a kubernetes cluster with service accounts enabled, defaults to the current namespace. If not, defaults to 'default' """) def _namespace_default(self): """ Set namespace default to current namespace if running in a k8s cluster If not in a k8s cluster with service accounts enabled, default to 'default' """ ns_path = '/var/run/secrets/kubernetes.io/serviceaccount/namespace' if os.path.exists(ns_path): with open(ns_path) as f: return f.read().strip() return 'default' ip = Unicode('0.0.0.0', help=""" The IP address (or hostname) the single-user server should listen on. We override this from the parent so we can set a more sane default for the Kubernetes setup. """).tag(config=True) cmd = Command(None, allow_none=True, minlen=0, help=""" The command used for starting the single-user server. Provide either a string or a list containing the path to the startup script command. Extra arguments, other than this path, should be provided via `args`. This is usually set if you want to start the single-user server in a different python environment (with virtualenv/conda) than JupyterHub itself. Some spawners allow shell-style expansion here, allowing you to use environment variables. Most, including the default, do not. Consult the documentation for your spawner to verify! If set to None, Kubernetes will start the CMD that is specified in the Docker image being started. """).tag(config=True) singleuser_working_dir = Unicode(None, allow_none=True, help=""" The working directory were the Notebook server will be started inside the container. Defaults to `None` so the working directory will be the one defined in the Dockerfile. """).tag(config=True) pod_name_template = Unicode('jupyter-{username}', config=True, help=""" Template to use to form the name of user's pods. {username} and {userid} are expanded to the escaped, dns-label safe username & integer user id respectively. This must be unique within the namespace the pods are being spawned in, so if you are running multiple jupyterhubs spawning in the same namespace, consider setting this to be something more unique. """) user_storage_pvc_ensure = Bool(False, config=True, help=""" Ensure that a PVC exists for each user before spawning. Set to true to create a PVC named with `pvc_name_template` if it does not exist for the user when their pod is spawning. """) pvc_name_template = Unicode('claim-{username}', config=True, help=""" Template to use to form the name of user's pvc. {username} and {userid} are expanded to the escaped, dns-label safe username & integer user id respectively. This must be unique within the namespace the pvc are being spawned in, so if you are running multiple jupyterhubs spawning in the same namespace, consider setting this to be something more unique. """) hub_connect_ip = Unicode(None, config=True, allow_none=True, help=""" IP/DNS hostname to be used by pods to reach out to the hub API. Defaults to `None`, in which case the `hub_ip` config is used. In kubernetes contexts, this is often not the same as `hub_ip`, since the hub runs in a pod which is fronted by a service. This IP should be something that pods can access to reach the hub process. This can also be through the proxy - API access is authenticated with a token that is passed only to the hub, so security is fine. Usually set to the service IP / DNS name of the service that fronts the hub pod (deployment/replicationcontroller/replicaset) Used together with `hub_connect_port` configuration. """) hub_connect_port = Integer(config=True, help=""" Port to use by pods to reach out to the hub API. Defaults to be the same as `hub_port`. In kubernetes contexts, this is often not the same as `hub_port`, since the hub runs in a pod which is fronted by a service. This allows easy port mapping, and some systems take advantage of it. This should be set to the `port` attribute of a service that is fronting the hub pod. """) def _hub_connect_port_default(self): """ Set default port on which pods connect to hub to be the hub port The hub needs to be accessible to the pods at this port. We default to the port the hub is listening on. This would be overriden in case some amount of port mapping is happening. """ return self.hub.server.port singleuser_extra_labels = Dict({}, config=True, help=""" Extra kubernetes labels to set on the spawned single-user pods. The keys and values specified here would be set as labels on the spawned single-user kubernetes pods. The keys and values must both be strings that match the kubernetes label key / value constraints. See https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ for more info on what labels are and why you might want to use them! {username} and {userid} are expanded to the escaped, dns-label safe username & integer user id respectively, wherever they are used. """) singleuser_image_spec = Unicode('jupyterhub/singleuser:latest', config=True, help=""" Docker image spec to use for spawning user's containers. Defaults to `jupyterhub/singleuser:latest` Name of the container + a tag, same as would be used with a `docker pull` command. If tag is set to `latest`, kubernetes will check the registry each time a new user is spawned to see if there is a newer image available. If available, new image will be pulled. Note that this could cause long delays when spawning, especially if the image is large. If you do not specify a tag, whatever version of the image is first pulled on the node will be used, thus possibly leading to inconsistent images on different nodes. For all these reasons, it is recommended to specify a specific immutable tag for the imagespec. If your image is very large, you might need to increase the timeout for starting the single user container from the default. You can set this with: ``` c.KubeSpawner.start_timeout = 60 * 5 # Upto 5 minutes ``` """) singleuser_image_pull_policy = Unicode('IfNotPresent', config=True, help=""" The image pull policy of the docker container specified in singleuser_image_spec. Defaults to `IfNotPresent` which causes the Kubelet to NOT pull the image specified in singleuser_image_spec if it already exists, except if the tag is :latest. For more information on image pull policy, refer to http://kubernetes.io/docs/user-guide/images/ This configuration is primarily used in development if you are actively changing the singleuser_image_spec and would like to pull the image whenever a user container is spawned. """) singleuser_image_pull_secrets = Unicode(None, allow_none=True, config=True, help=""" The kubernetes secret to use for pulling images from private repository. Set this to the name of a Kubernetes secret containing the docker configuration required to pull the image specified in singleuser_image_spec. https://kubernetes.io/docs/user-guide/images/#specifying-imagepullsecrets-on-a-pod has more information on when and why this might need to be set, and what it should be set to. """) singleuser_node_selector = Dict({}, config=True, help=""" The dictionary Selector labels used to match the Nodes where Pods will be launched. Default is None and means it will be launched in any available Node. For example to match the Nodes that have a label of `disktype: ssd` use: `{"disktype": "ssd"}` """) singleuser_uid = Union([Integer(), Callable()], allow_none=True, config=True, help=""" The UID to run the single-user server containers as. This UID should ideally map to a user that already exists in the container image being used. Running as root is discouraged. Instead of an integer, this could also be a callable that takes as one parameter the current spawner instance and returns an integer. The callable will be called asynchronously if it returns a future. Note that the interface of the spawner class is not deemed stable across versions, so using this functionality might cause your JupyterHub or kubespawner upgrades to break. If set to `None`, the user specified with the `USER` directive in the container metadata is used. """) singleuser_fs_gid = Union([Integer(), Callable()], allow_none=True, config=True, help=""" The GID of the group that should own any volumes that are created & mounted. A special supplemental group that applies primarily to the volumes mounted in the single-user server. In volumes from supported providers, the following things happen: 1. The owning GID will be the this GID 2. The setgid bit is set (new files created in the volume will be owned by this GID) 3. The permission bits are OR’d with rw-rw The single-user server will also be run with this gid as part of its supplemental groups. Instead of an integer, this could also be a callable that takes as one parameter the current spawner instance and returns an integer. The callable will be called asynchronously if it returns a future, rather than an int. Note that the interface of the spawner class is not deemed stable across versions, so using this functionality might cause your JupyterHub or kubespawner upgrades to break. You'll *have* to set this if you are using auto-provisioned volumes with most cloud providers. See [fsGroup](http://kubernetes.io/docs/api-reference/v1/definitions/#_v1_podsecuritycontext) for more details. """) volumes = List([], config=True, help=""" List of Kubernetes Volume specifications that will be mounted in the user pod. This list will be directly added under `volumes` in the kubernetes pod spec, so you should use the same structure. Each item in the list must have the following two keys: - name Name that'll be later used in the `volume_mounts` config to mount this volume at a specific path. - <name-of-a-supported-volume-type> (such as `hostPath`, `persistentVolumeClaim`, etc) The key name determines the type of volume to mount, and the value should be an object specifying the various options available for that kind of volume. See http://kubernetes.io/docs/user-guide/volumes/ for more information on the various kinds of volumes available and their options. Your kubernetes cluster must already be configured to support the volume types you want to use. {username} and {userid} are expanded to the escaped, dns-label safe username & integer user id respectively, wherever they are used. """) volume_mounts = List([], config=True, help=""" List of paths on which to mount volumes in the user notebook's pod. This list will be added to the values of the `volumeMounts` key under the user's container in the kubernetes pod spec, so you should use the same structure as that. Each item in the list should be a dictionary with at least these two keys: - mountPath The path on the container in which we want to mount the volume. - name The name of the volume we want to mount, as specified in the `volumes` config. See http://kubernetes.io/docs/user-guide/volumes/ for more information on how the volumeMount item works. {username} and {userid} are expanded to the escaped, dns-label safe username & integer user id respectively, wherever they are used. """) user_storage_capacity = Unicode(None, config=True, allow_none=True, help=""" The ammount of storage space to request from the volume that the pvc will mount to. This ammount will be the ammount of storage space the user has to work with on their notebook. If left blank, the kubespawner will not create a pvc for the pod. This will be added to the `resources: requests: storage:` in the k8s pod spec. See http://kubernetes.io/docs/user-guide/persistent-volumes/#persistentvolumeclaims for more information on how storage works. Quantities can be represented externally as unadorned integers, or as fixed-point integers with one of these SI suffices (E, P, T, G, M, K, m) or their power-of-two equivalents (Ei, Pi, Ti, Gi, Mi, Ki). For example, the following represent roughly 'the same value: 128974848, "129e6", "129M" , "123Mi". (https://github.com/kubernetes/kubernetes/blob/master/docs/design/resources.md) """) user_storage_extra_labels = Dict({}, config=True, help=""" Extra kubernetes labels to set on the user PVCs. The keys and values specified here would be set as labels on the PVCs created by kubespawner for the user. Note that these are only set when the PVC is created, not later when they are updated. See https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ for more info on what labels are and why you might want to use them! {username} and {userid} are expanded to the escaped, dns-label safe username & integer user id respectively, wherever they are used. """) user_storage_class = Unicode(None, config=True, allow_none=True, help=""" The storage class that the pvc will use. If left blank, the kubespawner will not create a pvc for the pod. This will be added to the `annotations: volume.beta.kubernetes.io/storage-class:` in the pvc metadata. This will determine what type of volume the pvc will request to use. If one exists that matches the criteria of the StorageClass, the pvc will mount to that. Otherwise, b/c it has a storage class, k8s will dynamicallly spawn a pv for the pvc to bind to and a machine in the cluster for the pv to bind to. See http://kubernetes.io/docs/user-guide/persistent-volumes/#storageclasses for more information on how StorageClasses work. """) user_storage_access_modes = List(["ReadWriteOnce"], config=True, help=""" List of access modes the user has for the pvc. The access modes are: The access modes are: ReadWriteOnce – the volume can be mounted as read-write by a single node ReadOnlyMany – the volume can be mounted read-only by many nodes ReadWriteMany – the volume can be mounted as read-write by many nodes See http://kubernetes.io/docs/user-guide/persistent-volumes/#access-modes for more information on how access modes work. """) singleuser_lifecycle_hooks = Dict({}, config=True, help=""" Kubernetes lifecycle hooks to set on the spawned single-user pods. The keys is name of hooks and there are only two hooks, postStart and preStop. The values are handler of hook which executes by Kubernetes management system when hook is called. Below are a sample copied from Kubernetes doc https://kubernetes.io/docs/tasks/configure-pod-container/attach-handler-lifecycle-event/ lifecycle: postStart: exec: command: ["/bin/sh", "-c", "echo Hello from the postStart handler > /usr/share/message"] preStop: exec: command: ["/usr/sbin/nginx","-s","quit"] See https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/ for more info on what lifecycle hooks are and why you might want to use them! """) singleuser_init_containers = List(None, config=True, help=""" List of initialization containers belonging to the pod. This list will be directly added under `initContainers` in the kubernetes pod spec, so you should use the same structure. Each item in the list is container configuration which follows spec at https://kubernetes.io/docs/api-reference/v1.6/#container-v1-core. One usage is disabling access to metadata service from single-user notebook server with configuration below: initContainers:: - name: init-iptables image: <image with iptables installed> command: ["iptables", "-A", "OUTPUT", "-p", "tcp", "--dport", "80", "-d", "169.254.169.254", "-j", "DROP"] securityContext: capabilities: add: - NET_ADMIN See https://kubernetes.io/docs/concepts/workloads/pods/init-containers/ for more info on what init containers are and why you might want to use them! To user this feature, Kubernetes version must greater than 1.6. """) def _expand_user_properties(self, template): # Make sure username matches the restrictions for DNS labels safe_chars = set(string.ascii_lowercase + string.digits) legacy_escaped_username = ''.join( [s if s in safe_chars else '-' for s in self.user.name.lower()]) safe_username = escapism.escape(self.user.name, safe=safe_chars, escape_char='-').lower() return template.format(userid=self.user.id, username=safe_username, legacy_escape_username=legacy_escaped_username) def _expand_all(self, src): if isinstance(src, list): return [self._expand_all(i) for i in src] elif isinstance(src, dict): return {k: self._expand_all(v) for k, v in src.items()} elif isinstance(src, str): return self._expand_user_properties(src) else: return src @gen.coroutine def get_pod_manifest(self): """ Make a pod manifest that will spawn current user's notebook pod. """ if callable(self.singleuser_uid): singleuser_uid = yield gen.maybe_future(self.singleuser_uid(self)) else: singleuser_uid = self.singleuser_uid if callable(self.singleuser_fs_gid): singleuser_fs_gid = yield gen.maybe_future( self.singleuser_fs_gid(self)) else: singleuser_fs_gid = self.singleuser_fs_gid if self.cmd: real_cmd = self.cmd + self.get_args() else: real_cmd = None # Add a hack to ensure that no service accounts are mounted in spawned pods # This makes sure that we don"t accidentally give access to the whole # kubernetes API to the users in the spawned pods. # See https://github.com/kubernetes/kubernetes/issues/16779#issuecomment-157460294 hack_volume = V1Volume() hack_volume.name = "no-api-access-please" hack_volume.empty_dir = {} hack_volume_mount = V1VolumeMount() hack_volume_mount.name = "no-api-access-please" hack_volume_mount.mount_path = "/var/run/secrets/kubernetes.io/serviceaccount" hack_volume_mount.read_only = True # Default set of labels, picked up from # https://github.com/kubernetes/helm/blob/master/docs/chart_best_practices/labels.md labels = { 'heritage': 'jupyterhub', 'component': 'singleuser-server', 'app': 'jupyterhub', 'hub.jupyter.org/username': escapism.escape(self.user.name) } labels.update(self._expand_all(self.singleuser_extra_labels)) return make_pod( name=self.pod_name, image_spec=self.singleuser_image_spec, image_pull_policy=self.singleuser_image_pull_policy, image_pull_secret=self.singleuser_image_pull_secrets, port=self.port, cmd=real_cmd, node_selector=self.singleuser_node_selector, run_as_uid=singleuser_uid, fs_gid=singleuser_fs_gid, env=self.get_env(), volumes=self._expand_all(self.volumes) + [hack_volume], volume_mounts=self._expand_all(self.volume_mounts) + [hack_volume_mount], working_dir=self.singleuser_working_dir, labels=labels, cpu_limit=self.cpu_limit, cpu_guarantee=self.cpu_guarantee, mem_limit=self.mem_limit, mem_guarantee=self.mem_guarantee, lifecycle_hooks=self.singleuser_lifecycle_hooks, init_containers=self.singleuser_init_containers, ) def get_pvc_manifest(self): """ Make a pvc manifest that will spawn current user's pvc. """ # Default set of labels, picked up from # https://github.com/kubernetes/helm/blob/master/docs/chart_best_practices/labels.md labels = { 'heritage': 'jupyterhub', 'app': 'jupyterhub', 'hub.jupyter.org/username': escapism.escape(self.user.name) } labels.update(self._expand_all(self.user_storage_extra_labels)) return make_pvc(name=self.pvc_name, storage_class=self.user_storage_class, access_modes=self.user_storage_access_modes, storage=self.user_storage_capacity, labels=labels) def is_pod_running(self, pod): """ Check if the given pod is running pod must be a dictionary representing a Pod kubernetes API object. """ # FIXME: Validate if this is really the best way is_running = pod.status.phase == 'Running' and \ pod.status.pod_ip is not None and \ pod.metadata.deletion_timestamp is None and \ all([cs.ready for cs in pod.status.container_statuses]) return is_running def get_state(self): """ Save state required to reinstate this user's pod from scratch We save the pod_name, even though we could easily compute it, because JupyterHub requires you save *some* state! Otherwise it assumes your server is dead. This works around that. It's also useful for cases when the pod_template changes between restarts - this keeps the old pods around. """ state = super().get_state() state['pod_name'] = self.pod_name return state def load_state(self, state): """ Load state from storage required to reinstate this user's pod Since this runs after __init__, this will override the generated pod_name if there's one we have saved in state. These are the same in most cases, but if the pod_template has changed in between restarts, it will no longer be the case. This allows us to continue serving from the old pods with the old names. """ if 'pod_name' in state: self.pod_name = state['pod_name'] @gen.coroutine def poll(self): """ Check if the pod is still running. Returns None if it is, and 1 if it isn't. These are the return values JupyterHub expects. """ data = self.pod_reflector.pods.get(self.pod_name, None) if data is not None and self.is_pod_running(data): return None return 1 @run_on_executor def asynchronize(self, method, *args, **kwargs): return method(*args, **kwargs) @gen.coroutine def start(self): if self.user_storage_pvc_ensure: pvc = self.get_pvc_manifest() try: yield self.asynchronize( self.api.create_namespaced_persistent_volume_claim, namespace=self.namespace, body=pvc) except ApiException as e: if e.status == 409: self.log.info( "PVC " + self.pvc_name + " already exists, so did not create new pvc.") else: raise # If we run into a 409 Conflict error, it means a pod with the # same name already exists. We stop it, wait for it to stop, and # try again. We try 4 times, and if it still fails we give up. # FIXME: Have better / cleaner retry logic! retry_times = 4 pod = yield self.get_pod_manifest() for i in range(retry_times): try: yield self.asynchronize(self.api.create_namespaced_pod, self.namespace, pod) break except ApiException as e: if e.status != 409: # We only want to handle 409 conflict errors self.log.exception("Failed for %s", pod.to_str()) raise self.log.info('Found existing pod %s, attempting to kill', self.pod_name) yield self.stop(True) self.log.info( 'Killed pod %s, will try starting singleuser pod again', self.pod_name) else: raise Exception( 'Can not create user pod %s already exists & could not be deleted' % self.pod_name) while True: pod = self.pod_reflector.pods.get(self.pod_name, None) if pod is not None and self.is_pod_running(pod): break yield gen.sleep(1) return (pod.status.pod_ip, self.port) @gen.coroutine def stop(self, now=False): delete_options = client.V1DeleteOptions() if now: grace_seconds = 0 else: # Give it some time, but not the default (which is 30s!) # FIXME: Move this into pod creation maybe? grace_seconds = 1 delete_options.grace_period_seconds = grace_seconds yield self.asynchronize(self.api.delete_namespaced_pod, name=self.pod_name, namespace=self.namespace, body=delete_options, grace_period_seconds=grace_seconds) while True: data = self.pod_reflector.pods.get(self.pod_name, None) if data is None: break yield gen.sleep(1) def _env_keep_default(self): return [] def get_args(self): args = super(KubeSpawner, self).get_args() # HACK: we wanna replace --hub-api-url=self.hub.api_url with # self.accessible_hub_api_url. This is required in situations where # the IP the hub is listening on (such as 0.0.0.0) is not the IP where # it can be reached by the pods (such as the service IP used for the hub!) # FIXME: Make this better? to_replace = '--hub-api-url="%s"' % (self.hub.api_url) for i in range(len(args)): if args[i] == to_replace: args[i] = '--hub-api-url="%s"' % (self.accessible_hub_api_url) break return args def get_env(self): # HACK: This is deprecated, and should be removed soon. # We set these to be compatible with DockerSpawner and earlie KubeSpawner env = super(KubeSpawner, self).get_env() env.update({ 'JPY_USER': self.user.name, 'JPY_COOKIE_NAME': self.user.server.cookie_name, 'JPY_BASE_URL': self.user.server.base_url, 'JPY_HUB_PREFIX': self.hub.server.base_url, 'JPY_HUB_API_URL': self.accessible_hub_api_url }) return env
class YarnSpawner(Spawner): """A spawner for starting singleuser instances in a YARN container.""" start_timeout = Integer( 300, help= "Timeout (in seconds) before giving up on starting of singleuser server.", config=True) ip = Unicode( "0.0.0.0", help= "The IP address (or hostname) the singleuser server should listen on.", config=True) principal = Unicode( None, help='Kerberos principal for JupyterHub user', allow_none=True, config=True, ) keytab = Unicode( None, help='Path to kerberos keytab for JupyterHub user', allow_none=True, config=True, ) queue = Unicode( 'default', help='The YARN queue to submit applications under', config=True, ) localize_files = Dict( help=""" Extra files to distribute to the singleuser server container. This is a mapping from ``local-name`` to ``resource``. Resource paths can be local, or in HDFS (prefix with ``hdfs://...`` if so). If an archive (``.tar.gz`` or ``.zip``), the resource will be unarchived as directory ``local-name``. For finer control, resources can also be specified as ``skein.File`` objects, or their ``dict`` equivalents. This can be used to distribute conda/virtual environments by configuring the following: .. code:: c.YarnSpawner.localize_files = { 'environment': { 'source': 'hdfs:///path/to/archived/environment.tar.gz', 'visibility': 'public' } } c.YarnSpawner.prologue = 'source environment/bin/activate' These archives are usually created using either ``conda-pack`` or ``venv-pack``. For more information on distributing files, see https://jcrist.github.io/skein/distributing-files.html. """, config=True, ) prologue = Unicode( '', help='Script to run before singleuser server starts.', config=True, ) cmd = Command(['python -m yarnspawner.singleuser'], allow_none=True, help='The command used for starting the singleuser server.', config=True) mem_limit = ByteSpecification('2 G', help=""" Maximum number of bytes a singleuser notebook server is allowed to use. Allows the following suffixes: - K -> Kibibytes - M -> Mebibytes - G -> Gibibytes - T -> Tebibytes """, config=True) cpu_limit = Integer(1, min=1, help=""" Maximum number of cpu-cores a singleuser notebook server is allowed to use. Unlike other spawners, this must be an integer amount >= 1. """, config=True) epilogue = Unicode( '', help='Script to run after singleuser server ends.', config=True, ) script_template = Unicode( ("{prologue}\n" "{singleuser_command}\n" "{epilogue}"), help=""" Template for application script. Filled in by calling ``script_template.format(**variables)``. Variables include the following attributes of this class: - prologue - singleuser_command - epilogue """, config=True, ) delegation_token_providers = List( [], help=""" Collection of systems for which to collect the kerberos delegation token. Example: .. code:: c.YarnSpawner.delegation_token_providers = [{ 'name': 'hive', 'config': { 'hive.jdbc.url':'hive2://127.0.0.1:10000/myDatabase', 'hive.jdbc.principal':'hive/[email protected]' } }, { 'name': 'hcat', 'config': { 'hcat.metastore.uri':'thrift://127.0.0.1:9083', 'hcat.metastore.principal':'hive/[email protected]' } }] """, config=True, ) # A cache of clients by (principal, keytab). In most cases this will only # be a single client. These should persist for the lifetime of jupyterhub. clients = {} async def _get_client(self): key = (self.principal, self.keytab) client = type(self).clients.get(key) if client is None: kwargs = dict(principal=self.principal, keytab=self.keytab, security=skein.Security.new_credentials()) client = await gen.IOLoop.current().run_in_executor( None, lambda: skein.Client(**kwargs)) type(self).clients[key] = client return client @property def singleuser_command(self): """The full command (with args) to launch a singleuser server""" return ' '.join(self.cmd + self.get_args()) def _build_specification(self): script = self.script_template.format( prologue=self.prologue, singleuser_command=self.singleuser_command, epilogue=self.epilogue) resources = skein.Resources(memory='%d b' % self.mem_limit, vcores=self.cpu_limit) security = skein.Security.new_credentials() # Support dicts as well as File objects files = { k: skein.File.from_dict(v) if isinstance(v, dict) else v for k, v in self.localize_files.items() } master = skein.Master(resources=resources, files=files, env=self.get_env(), script=script, security=security) return skein.ApplicationSpec( name='jupyterhub', queue=self.queue, user=self.user.name, master=master, delegation_token_providers=self.delegation_token_providers) def load_state(self, state): super().load_state(state) self.app_id = state.get('app_id', '') def get_state(self): state = super().get_state() if self.app_id: state['app_id'] = self.app_id return state def clear_state(self): super().clear_state() self.app_id = '' async def start(self): loop = gen.IOLoop.current() spec = self._build_specification() client = await self._get_client() # Set app_id == 'PENDING' to signal that we're starting self.app_id = 'PENDING' try: self.app_id = app_id = await loop.run_in_executor( None, client.submit, spec) except Exception as exc: # We errored, no longer pending self.app_id = '' self.log.error( "Failed to submit application for user %s. Original exception:", self.user.name, exc_info=exc) raise # Wait for application to start while True: report = await loop.run_in_executor(None, client.application_report, app_id) state = str(report.state) if state in _STOPPED_STATES: raise Exception("Application %s failed to start, check " "application logs for more information" % app_id) elif state == 'RUNNING': self.current_ip = report.host break else: await gen.sleep(0.5) # Wait for port to be set while getattr(self, 'current_port', 0) == 0: await gen.sleep(0.5) report = await loop.run_in_executor(None, client.application_report, app_id) if str(report.state) in _STOPPED_STATES: raise Exception("Application %s failed to start, check " "application logs for more information" % app_id) return self.current_ip, self.current_port async def poll(self): if self.app_id == '': return 0 elif self.app_id == 'PENDING': return None client = await self._get_client() report = await gen.IOLoop.current().run_in_executor( None, client.application_report, self.app_id) status = str(report.final_status) if status in {'SUCCEEDED', 'KILLED'}: return 0 elif status == 'FAILED': return 1 else: return None async def stop(self, now=False): if self.app_id == 'PENDING': # The application is in the process of being submitted. Wait for a # reasonable amount of time until we have an application id for i in range(20): if self.app_id != 'PENDING': break await gen.sleep(0.1) else: self.log.warn("Application has been PENDING for an " "unreasonable amount of time, there's likely " "something wrong") # Application not submitted, or submission errored out, nothing to do. if self.app_id == '': return client = await self._get_client() await gen.IOLoop.current().run_in_executor(None, client.kill_application, self.app_id)
class ConfigurableHTTPProxy(Proxy): """Proxy implementation for the default configurable-http-proxy.""" proxy_process = Any() client = Instance(AsyncHTTPClient, ()) debug = Bool(False, help="Add debug-level logging to the Proxy", config=True) auth_token = Unicode(help="""The Proxy Auth token. Loaded from the CONFIGPROXY_AUTH_TOKEN env variable by default. """, ).tag(config=True) check_running_interval = Integer(5, config=True) @default('auth_token') def _auth_token_default(self): token = os.environ.get('CONFIGPROXY_AUTH_TOKEN', None) if not token: self.log.warning('\n'.join([ "", "Generating CONFIGPROXY_AUTH_TOKEN. Restarting the Hub will require restarting the proxy.", "Set CONFIGPROXY_AUTH_TOKEN env or JupyterHub.proxy_auth_token config to avoid this message.", "", ])) token = utils.new_token() return token api_url = Unicode( 'http://127.0.0.1:8001', config=True, help="""The ip (or hostname) of the proxy's API endpoint""") command = Command('configurable-http-proxy', config=True, help="""The command to start the proxy""") @gen.coroutine def start(self): public_server = Server.from_url(self.public_url) api_server = Server.from_url(self.api_url) env = os.environ.copy() env['CONFIGPROXY_AUTH_TOKEN'] = self.auth_token cmd = self.command + [ '--ip', public_server.ip, '--port', str(public_server.port), '--api-ip', api_server.ip, '--api-port', str(api_server.port), '--default-target', self.hub.host, '--error-target', url_path_join(self.hub.url, 'error'), ] if self.app.subdomain_host: cmd.append('--host-routing') if self.debug: cmd.extend(['--log-level', 'debug']) if self.ssl_key: cmd.extend(['--ssl-key', self.ssl_key]) if self.ssl_cert: cmd.extend(['--ssl-cert', self.ssl_cert]) if self.app.statsd_host: cmd.extend([ '--statsd-host', self.app.statsd_host, '--statsd-port', str(self.app.statsd_port), '--statsd-prefix', self.app.statsd_prefix + '.chp' ]) # Warn if SSL is not used if ' --ssl' not in ' '.join(cmd): self.log.warning( "Running JupyterHub without SSL." " I hope there is SSL termination happening somewhere else..." ) self.log.info("Starting proxy @ %s", public_server.bind_url) self.log.debug("Proxy cmd: %s", cmd) try: self.proxy_process = Popen(cmd, env=env, start_new_session=True) except FileNotFoundError as e: self.log.error( "Failed to find proxy %r\n" "The proxy can be installed with `npm install -g configurable-http-proxy`" % self.cmd) self.exit(1) def _check_process(): status = self.proxy_process.poll() if status is not None: e = RuntimeError("Proxy failed to start with exit code %i" % status) # py2-compatible `raise e from None` e.__cause__ = None raise e for server in (public_server, api_server): for i in range(10): _check_process() try: yield server.wait_up(1) except TimeoutError: continue else: break yield server.wait_up(1) time.sleep(1) _check_process() self.log.debug("Proxy started and appears to be up") pc = PeriodicCallback(self.check_running, 1e3 * self.check_running_interval) pc.start() def stop(self): self.log.info("Cleaning up proxy[%i]...", self.proxy_process.pid) if self.proxy_process.poll() is None: try: self.proxy_process.terminate() except Exception as e: self.log.error("Failed to terminate proxy process: %s", e) @gen.coroutine def check_running(self): """Check if the proxy is still running""" if self.proxy_process.poll() is None: return self.log.error( "Proxy stopped with exit code %r", 'unknown' if self.proxy_process is None else self.proxy_process.poll()) yield self.start() yield self.restore_routes() def api_request(self, path, method='GET', body=None, client=None): """Make an authenticated API request of the proxy.""" client = client or AsyncHTTPClient() url = url_path_join(self.api_url, 'api/routes', path) if isinstance(body, dict): body = json.dumps(body) self.log.debug("Proxy: Fetching %s %s", method, url) req = HTTPRequest( url, method=method, headers={'Authorization': 'token {}'.format(self.auth_token)}, body=body, ) return client.fetch(req) def add_route(self, routespec, target, data=None): body = data or {} body['target'] = target return self.api_request( routespec, method='POST', body=body, ) def delete_route(self, routespec): return self.api_request(routespec, method='DELETE') def _reformat_routespec(self, routespec, chp_data): """Reformat CHP data format to JupyterHub's proxy API.""" target = chp_data.pop('target') return { 'routespec': routespec, 'target': target, 'data': chp_data, } @gen.coroutine def get_route(self, routespec): chp_data = yield self.api_request(routespec, method='DELETE') return self._reformat_routespec(routespec, chp_data) @gen.coroutine def get_all_routes(self, client=None): """Fetch the proxy's routes.""" resp = yield self.api_request('', client=client) chp_routes = json.loads(resp.body.decode('utf8', 'replace')) all_routes = {} for routespec, chp_data in chp_routes.items(): all_routes[routespec] = self._reformat_routespec( routespec, chp_data) return all_routes
class BatchSpawnerBase(Spawner): """Base class for spawners using resource manager batch job submission mechanisms This base class is developed targetting the TorqueSpawner and SlurmSpawner, so by default assumes a qsub-like command that reads a script from its stdin for starting jobs, a qstat-like command that outputs some data that can be parsed to check if the job is running and on what remote node, and a qdel-like command to cancel a job. The goal is to be sufficiently general that a broad range of systems can be supported with minimal overrides. At minimum, subclasses should provide reasonable defaults for the traits: batch_script batch_submit_cmd batch_query_cmd batch_cancel_cmd and must provide implementations for the methods: state_ispending state_isrunning state_gethost """ # override default since will need to set the listening port using the api cmd = Command(['batchspawner-singleuser'], allow_none=True).tag(config=True) # override default since batch systems typically need longer start_timeout = Integer(300).tag(config=True) # override default server ip since batch jobs normally running remotely ip = Unicode( "0.0.0.0", help="Address for singleuser server to listen at").tag(config=True) exec_prefix = Unicode( 'sudo -E -u {username}', help="Standard executon prefix (e.g. the default sudo -E -u {username})" ).tag(config=True) # all these req_foo traits will be available as substvars for templated strings req_queue = Unicode( '', help="Queue name to submit job to resource manager").tag(config=True) req_host = Unicode( '', help="Host name of batch server to submit job to resource manager" ).tag(config=True) req_memory = Unicode( '', help="Memory to request from resource manager").tag(config=True) req_nprocs = Unicode( '', help="Number of processors to request from resource manager").tag( config=True) req_ngpus = Unicode( '', help="Number of GPUs to request from resource manager").tag( config=True) req_runtime = Unicode( '', help="Length of time for submitted job to run").tag(config=True) req_partition = Unicode( '', help="Partition name to submit job to resource manager").tag( config=True) req_account = Unicode( '', help="Account name string to pass to the resource manager").tag( config=True) req_options = Unicode( '', help="Other options to include into job submission script").tag( config=True) req_prologue = Unicode( '', help="Script to run before single user server starts.").tag( config=True) req_epilogue = Unicode( '', help="Script to run after single user server ends.").tag(config=True) req_username = Unicode() @default('req_username') def _req_username_default(self): return self.user.name # Useful IF getpwnam on submit host returns correct info for exec host req_homedir = Unicode() @default('req_homedir') def _req_homedir_default(self): return pwd.getpwnam(self.user.name).pw_dir req_keepvars = Unicode() @default('req_keepvars') def _req_keepvars_default(self): return ','.join(self.get_env().keys()) req_keepvars_extra = Unicode( help="Extra environment variables which should be configured, " "added to the defaults in keepvars, " "comma separated list.") batch_script = Unicode( '', help= "Template for job submission script. Traits on this class named like req_xyz " "will be substituted in the template for {xyz} using string.Formatter. " "Must include {cmd} which will be replaced with the jupyterhub-singleuser command line." ).tag(config=True) # Raw output of job submission command unless overridden job_id = Unicode() # Will get the raw output of the job status command unless overridden job_status = Unicode() # Will get the address of the server as reported by job manager current_ip = Unicode() # Will get the port of the server as reported by singleserver current_port = Integer() # Prepare substitution variables for templates using req_xyz traits def get_req_subvars(self): reqlist = [t for t in self.trait_names() if t.startswith('req_')] subvars = {} for t in reqlist: subvars[t[4:]] = getattr(self, t) if subvars.get('keepvars_extra'): subvars['keepvars'] += ',' + subvars['keepvars_extra'] return subvars batch_submit_cmd = Unicode( '', help= "Command to run to submit batch scripts. Formatted using req_xyz traits as {xyz}." ).tag(config=True) def parse_job_id(self, output): "Parse output of submit command to get job id." return output def cmd_formatted_for_batch(self): return ' '.join(self.cmd + self.get_args()) @gen.coroutine def run_command(self, cmd, input=None, env=None): proc = Subprocess(cmd, shell=True, env=env, stdin=Subprocess.STREAM, stdout=Subprocess.STREAM, stderr=Subprocess.STREAM) inbytes = None if input: inbytes = input.encode() try: yield proc.stdin.write(inbytes) except StreamClosedError as exp: # Apparently harmless pass proc.stdin.close() out, eout = yield [ proc.stdout.read_until_close(), proc.stderr.read_until_close() ] proc.stdout.close() proc.stderr.close() eout = eout.decode().strip() try: err = yield proc.wait_for_exit() except CalledProcessError: self.log.error("Subprocess returned exitcode %s" % proc.returncode) self.log.error('Stdout:') self.log.error(out) self.log.error('Stderr:') self.log.error(eout) raise RuntimeError('{} exit status {}: {}'.format( cmd, proc.returncode, eout)) if err != 0: return err # exit error? else: out = out.decode().strip() return out @gen.coroutine def _get_batch_script(self, **subvars): """Format batch script from vars""" # Colud be overridden by subclasses, but mainly useful for testing return format_template(self.batch_script, **subvars) @gen.coroutine def submit_batch_script(self): subvars = self.get_req_subvars() cmd = ' '.join((format_template(self.exec_prefix, **subvars), format_template(self.batch_submit_cmd, **subvars))) subvars['cmd'] = self.cmd_formatted_for_batch() if hasattr(self, 'user_options'): subvars.update(self.user_options) script = yield self._get_batch_script(**subvars) self.log.info('Spawner submitting job using ' + cmd) self.log.info('Spawner submitted script:\n' + script) out = yield self.run_command(cmd, input=script, env=self.get_env()) try: self.log.info('Job submitted. cmd: ' + cmd + ' output: ' + out) self.job_id = self.parse_job_id(out) except: self.log.error('Job submission failed with exit code ' + out) self.job_id = '' return self.job_id # Override if your batch system needs something more elaborate to read the job status batch_query_cmd = Unicode( '', help= "Command to run to read job status. Formatted using req_xyz traits as {xyz} " "and self.job_id as {job_id}.").tag(config=True) @gen.coroutine def read_job_state(self): if self.job_id is None or len(self.job_id) == 0: # job not running self.job_status = '' return self.job_status subvars = self.get_req_subvars() subvars['job_id'] = self.job_id cmd = ' '.join((format_template(self.exec_prefix, **subvars), format_template(self.batch_query_cmd, **subvars))) self.log.debug('Spawner querying job: ' + cmd) try: out = yield self.run_command(cmd, env=self.get_env()) self.job_status = out except Exception as e: self.log.error('Error querying job ' + self.job_id) self.job_status = '' finally: return self.job_status batch_cancel_cmd = Unicode( '', help= "Command to stop/cancel a previously submitted job. Formatted like batch_query_cmd." ).tag(config=True) @gen.coroutine def cancel_batch_job(self): subvars = self.get_req_subvars() subvars['job_id'] = self.job_id cmd = ' '.join((format_template(self.exec_prefix, **subvars), format_template(self.batch_cancel_cmd, **subvars))) self.log.info('Cancelling job ' + self.job_id + ': ' + cmd) yield self.run_command(cmd, env=self.get_env()) def load_state(self, state): """load job_id from state""" super(BatchSpawnerBase, self).load_state(state) self.job_id = state.get('job_id', '') self.job_status = state.get('job_status', '') def get_state(self): """add job_id to state""" state = super(BatchSpawnerBase, self).get_state() if self.job_id: state['job_id'] = self.job_id if self.job_status: state['job_status'] = self.job_status return state def clear_state(self): """clear job_id state""" super(BatchSpawnerBase, self).clear_state() self.job_id = "" self.job_status = '' def make_preexec_fn(self, name): """make preexec fn to change uid (if running as root) before job submission""" return set_user_setuid(name) def state_ispending(self): "Return boolean indicating if job is still waiting to run, likely by parsing self.job_status" raise NotImplementedError("Subclass must provide implementation") def state_isrunning(self): "Return boolean indicating if job is running, likely by parsing self.job_status" raise NotImplementedError("Subclass must provide implementation") def state_gethost(self): "Return string, hostname or addr of running job, likely by parsing self.job_status" raise NotImplementedError("Subclass must provide implementation") @gen.coroutine def poll(self): """Poll the process""" if self.job_id is not None and len(self.job_id) > 0: yield self.read_job_state() if self.state_isrunning() or self.state_ispending(): return None else: self.clear_state() return 1 if not self.job_id: # no job id means it's not running self.clear_state() return 1 startup_poll_interval = Float( 0.5, help="Polling interval (seconds) to check job state during startup" ).tag(config=True) @gen.coroutine def start(self): """Start the process""" if jupyterhub.version_info >= (0, 8) and self.server: self.server.port = self.port job = yield self.submit_batch_script() # We are called with a timeout, and if the timeout expires this function will # be interrupted at the next yield, and self.stop() will be called. # So this function should not return unless successful, and if unsuccessful # should either raise and Exception or loop forever. if len(self.job_id) == 0: raise RuntimeError( "Jupyter batch job submission failure (no jobid in output)") while True: yield self.poll() if self.state_isrunning(): break else: if self.state_ispending(): self.log.debug('Job ' + self.job_id + ' still pending') else: self.log.warn('Job ' + self.job_id + ' neither pending nor running.\n' + self.job_status) raise RuntimeError( 'The Jupyter batch job has disappeared' ' while pending in the queue or died immediately' ' after starting.') yield gen.sleep(self.startup_poll_interval) self.current_ip = self.state_gethost() while self.current_port == 0: yield gen.sleep(self.startup_poll_interval) if jupyterhub.version_info < (0, 7): # store on user for pre-jupyterhub-0.7: self.user.server.port = self.current_port self.user.server.ip = self.current_ip self.db.commit() self.log.info("Notebook server job {0} started at {1}:{2}".format( self.job_id, self.current_ip, self.current_port)) return self.current_ip, self.current_port @gen.coroutine def stop(self, now=False): """Stop the singleuser server job. Returns immediately after sending job cancellation command if now=True, otherwise tries to confirm that job is no longer running.""" self.log.info("Stopping server job " + self.job_id) yield self.cancel_batch_job() if now: return for i in range(10): yield self.poll() if not self.state_isrunning(): return yield gen.sleep(1.0) if self.job_id: self.log.warn( "Notebook server job {0} at {1}:{2} possibly failed to terminate" .format(self.job_id, self.current_ip, self.port))
class ConfigurableHTTPProxy(Proxy): """Proxy implementation for the default configurable-http-proxy. This is the default proxy implementation for running the nodejs proxy `configurable-http-proxy`. If the proxy should not be run as a subprocess of the Hub, (e.g. in a separate container), set:: c.ConfigurableHTTPProxy.should_start = False """ proxy_process = Any() client = Instance(AsyncHTTPClient, ()) debug = Bool(False, help="Add debug-level logging to the Proxy.", config=True) auth_token = Unicode( help="""The Proxy auth token Loaded from the CONFIGPROXY_AUTH_TOKEN env variable by default. """, ).tag(config=True) check_running_interval = Integer(5, config=True) @default('auth_token') def _auth_token_default(self): token = os.environ.get('CONFIGPROXY_AUTH_TOKEN', None) if not token: self.log.warning('\n'.join([ "", "Generating CONFIGPROXY_AUTH_TOKEN. Restarting the Hub will require restarting the proxy.", "Set CONFIGPROXY_AUTH_TOKEN env or JupyterHub.proxy_auth_token config to avoid this message.", "", ])) token = utils.new_token() return token api_url = Unicode('http://127.0.0.1:8001', config=True, help="""The ip (or hostname) of the proxy's API endpoint""" ) command = Command('configurable-http-proxy', config=True, help="""The command to start the proxy""" ) @gen.coroutine def start(self): public_server = Server.from_url(self.public_url) api_server = Server.from_url(self.api_url) env = os.environ.copy() env['CONFIGPROXY_AUTH_TOKEN'] = self.auth_token cmd = self.command + [ '--ip', public_server.ip, '--port', str(public_server.port), '--api-ip', api_server.ip, '--api-port', str(api_server.port), '--error-target', url_path_join(self.hub.url, 'error'), ] if self.app.subdomain_host: cmd.append('--host-routing') if self.debug: cmd.extend(['--log-level', 'debug']) if self.ssl_key: cmd.extend(['--ssl-key', self.ssl_key]) if self.ssl_cert: cmd.extend(['--ssl-cert', self.ssl_cert]) if self.app.statsd_host: cmd.extend([ '--statsd-host', self.app.statsd_host, '--statsd-port', str(self.app.statsd_port), '--statsd-prefix', self.app.statsd_prefix + '.chp' ]) # Warn if SSL is not used if ' --ssl' not in ' '.join(cmd): self.log.warning("Running JupyterHub without SSL." " I hope there is SSL termination happening somewhere else...") self.log.info("Starting proxy @ %s", public_server.bind_url) self.log.debug("Proxy cmd: %s", cmd) shell = os.name == 'nt' try: self.proxy_process = Popen(cmd, env=env, start_new_session=True, shell=shell) except FileNotFoundError as e: self.log.error( "Failed to find proxy %r\n" "The proxy can be installed with `npm install -g configurable-http-proxy`" % self.command ) raise def _check_process(): status = self.proxy_process.poll() if status is not None: e = RuntimeError( "Proxy failed to start with exit code %i" % status) # py2-compatible `raise e from None` e.__cause__ = None raise e for server in (public_server, api_server): for i in range(10): _check_process() try: yield server.wait_up(1) except TimeoutError: continue else: break yield server.wait_up(1) _check_process() self.log.debug("Proxy started and appears to be up") pc = PeriodicCallback(self.check_running, 1e3 * self.check_running_interval) pc.start() def stop(self): self.log.info("Cleaning up proxy[%i]...", self.proxy_process.pid) if self.proxy_process.poll() is None: try: self.proxy_process.terminate() except Exception as e: self.log.error("Failed to terminate proxy process: %s", e) @gen.coroutine def check_running(self): """Check if the proxy is still running""" if self.proxy_process.poll() is None: return self.log.error("Proxy stopped with exit code %r", 'unknown' if self.proxy_process is None else self.proxy_process.poll() ) yield self.start() yield self.restore_routes() def _routespec_to_chp_path(self, routespec): """Turn a routespec into a CHP API path For host-based routing, CHP uses the host as the first path segment. """ path = self.validate_routespec(routespec) # CHP always wants to start with / if not path.startswith('/'): path = '/' + path # BUG: CHP doesn't seem to like trailing slashes on some endpoints (DELETE) if path != '/' and path.endswith('/'): path = path.rstrip('/') return path def _routespec_from_chp_path(self, chp_path): """Turn a CHP route into a route spec In the JSON API, CHP route keys are unescaped, so re-escape them to raw URLs and ensure slashes are in the right places. """ # chp stores routes in unescaped form. # restore escaped-form we created it with. routespec = quote(chp_path, safe='@/') if self.host_routing: # host routes don't start with / routespec = routespec.lstrip('/') # all routes should end with / if not routespec.endswith('/'): routespec = routespec + '/' return routespec def api_request(self, path, method='GET', body=None, client=None): """Make an authenticated API request of the proxy.""" client = client or AsyncHTTPClient() url = url_path_join(self.api_url, 'api/routes', path) if isinstance(body, dict): body = json.dumps(body) self.log.debug("Proxy: Fetching %s %s", method, url) req = HTTPRequest(url, method=method, headers={'Authorization': 'token {}'.format( self.auth_token)}, body=body, ) return client.fetch(req) def add_route(self, routespec, target, data): body = data or {} body['target'] = target body['jupyterhub'] = True path = self._routespec_to_chp_path(routespec) return self.api_request(path, method='POST', body=body, ) def delete_route(self, routespec): path = self._routespec_to_chp_path(routespec) return self.api_request(path, method='DELETE') def _reformat_routespec(self, routespec, chp_data): """Reformat CHP data format to JupyterHub's proxy API.""" target = chp_data.pop('target') chp_data.pop('jupyterhub') return { 'routespec': routespec, 'target': target, 'data': chp_data, } @gen.coroutine def get_all_routes(self, client=None): """Fetch the proxy's routes.""" resp = yield self.api_request('', client=client) chp_routes = json.loads(resp.body.decode('utf8', 'replace')) all_routes = {} for chp_path, chp_data in chp_routes.items(): routespec = self._routespec_from_chp_path(chp_path) if 'jupyterhub' not in chp_data: # exclude routes not associated with JupyterHub self.log.debug("Omitting non-jupyterhub route %r", routespec) continue all_routes[routespec] = self._reformat_routespec( routespec, chp_data) return all_routes
class SingularitySpawner(LocalProcessSpawner): """SingularitySpawner - extends the default LocalProcessSpawner to allow for: 1) User-specification of a singularity image via the Spawner options form 2) Spawning a Notebook server within a Singularity container """ singularity_cmd = Command(['/usr/local/bin/singularity','exec'], help=""" This is the singularity command that will be executed when starting the single-user server. The image path and notebook server args will be concatenated to the end of this command. This is a good place to specify any site-specific options that should be applied to all users, such as default mounts. """ ).tag(config=True) notebook_cmd = Command(['jupyterhub-singleuser'], help=""" The command used for starting the single-user server. Provide either a string or a list containing the path to the startup script command. Extra arguments, other than this path, should be provided via `args`. """ ).tag(config=True) default_image_path = Unicode('', help=""" Absolute POSIX filepath to Singularity image that will be used to execute the notebook server spawn command, if another path is not specified by the user. """ ).tag(config=True) pull_from_url = Bool(False, help=""" If set to True, the user should be presented with URI specification options, and the spawner should first pull a new image from the specified shub or docker URI prior to running the notebook command. In this configuration, the `user_image_path` will specify where the new container will be created. """ ).tag(config=True) default_image_url = Unicode('docker://jupyter/base-notebook', help=""" Singularity Hub or Docker URI from which the notebook image will be pulled, if no other URI is specified by the user but the _pull_ option has been selected. """ ).tag(config=True) options_form = Unicode() form_template = Unicode( """ <div class="checkbox"> <label> <input id="pull-checkbox" type="checkbox" value="pull" name="pull_from_url">Pull from URL </label> </div> <div id="url-group" class="form-group" hidden> <label for="user_image_url"> Specify the image URL to pull from: </label> <input class="form-control" name="user_image_url" value="{default_image_url}"> </div> <div class="form-group"> <label id="path-label" for="user_image_path"> Specify the Singularity image to use (absolute filepath): </label> <input class="form-control" name="user_image_path" value="{default_image_path}" required autofocus> </div> """ ) def format_default_image_path(self): """Format the image path template string.""" format_options = dict(username=self.user.escaped_name) default_image_path = self.default_image_path.format(**format_options) return default_image_path @default('options_form') def _options_form(self): """Render the options form.""" default_image_path = self.format_default_image_path() format_options = dict(default_image_path=default_image_path,default_image_url=self.default_image_url) options_form = self.form_template.format(**format_options) return JS_SCRIPT + options_form def options_from_form(self, form_data): """Get data from options form input fields.""" user_image_path = form_data.get('user_image_path', None) user_image_url = form_data.get('user_image_url', None) pull_from_url = form_data.get('pull_from_url',False) return dict(user_image_path=user_image_path,user_image_url=user_image_url,pull_from_url=pull_from_url) def get_image_url(self): """Get image URL to pull image from user options or default.""" default_image_url = self.default_image_url image_url = self.user_options.get('user_image_url',[default_image_url]) return image_url def get_image_path(self): """Get image filepath specified in user options else default.""" default_image_path = self.format_default_image_path() image_path = self.user_options.get('user_image_path',[default_image_path]) return image_path @gen.coroutine def pull_image(self,image_url): """Pull the singularity image to specified image path.""" image_path = self.get_image_path() s = Singularity() container_path = s.pull(image_url[0],image_name=image_path[0]) return Unicode(container_path) def _build_cmd(self): image_path = self.get_image_path() cmd = [] cmd.extend(self.singularity_cmd) cmd.extend(image_path) cmd.extend(self.notebook_cmd) return cmd @property def cmd(self): return self._build_cmd() @gen.coroutine def start(self): """ Start the single-user server in the Singularity container specified by image path, pulling from docker or shub first if the pull option is selected. """ image_path = self.get_image_path() pull_from_url = self.user_options.get('pull_from_url',False) if pull_from_url: image_url = self.get_image_url() self.pull_image(image_url) super(SingularitySpawner,self).start()
class C(HasTraits): cmd = Command('default command') cmd2 = Command(['default_cmd'])
class SingularitySpawner(LocalProcessSpawner): """SingularitySpawner - extends the default LocalProcessSpawner to allow for: 1) User-specification of a singularity image via the Spawner options form 2) Spawning a Notebook server within a Singularity container """ singularity_cmd = Command(['/opt/singularity/3.3.0/bin/singularity','-d','exec'], help=""" This is the singularity command that will be executed when starting the single-user server. The image path and notebook server args will be concatenated to the end of this command. This is a good place to specify any site-specific options that should be applied to all users, such as default mounts. """ ).tag(config=True) notebook_cmd = Command(['jupyterhub-singleuser'], help=""" The command used for starting the single-user server. Provide either a string or a list containing the path to the startup script command. Extra arguments, other than this path, should be provided via `args`. """ ).tag(config=True) imagename = Unicode('', help=""" Absolute POSIX filepath to Singularity image that will be used to execute the notebook server spawn command, if another path is not specified by the user. """ ).tag(config=True) default_image_path = Unicode('', help=""" Absolute POSIX filepath to Singularity image that will be used to execute the notebook server spawn command, if another path is not specified by the user. """ ).tag(config=True) def _build_cmd(self): cmd = [] cmd.extend(self.singularity_cmd) cmd.extend([self.default_image_path]) cmd.extend(self.notebook_cmd) return cmd @property def cmd(self): return self._build_cmd() def get_env(self): """Get the complete set of environment variables to be set in the spawned process.""" env = super().get_env() env = self.user_env(env) env['CONTAINER_IMAGE'] = str(self.imagename) tmpdirpath = os.path.join('/tmp',self.user.name,self.imagename) if not os.path.exists(tmpdirpath): os.makedirs(tmpdirpath, exist_ok=True) shutil.chown(tmpdirpath, user=str(self.user.name), group=str(self.user.name)) os.chmod(tmpdirpath, 0o755) env['SINGULARITY_BINDPATH'] = '/tmp/'+str(self.user.name)+'/'+str(self.imagename)+':/tmp' biojhubhome = str(subprocess.check_output('sudo -Hiu '+str(self.user.name)+' env| grep BIOJHUBHOME|cut -f2 -d "="', shell=True),'utf-8').rstrip() if biojhubhome is "": biojhubhome = '/data/users/'+str(self.user.name)+'/container_cache/'+str(self.imagename) else: biojhubhome = biojhubhome+'/'+str(self.imagename) #if not os.path.exists(biojhubhome): #print(str(subprocess.check_output('sudo -u '+str(self.user.name)+' mkdir -p '+biojhubhome),'utf-8')) if not os.path.isdir(biojhubhome): #os.makedirs(biojhubhome, exist_ok=True) #shutil.chown(biojhubhome, user=str(self.user.name), group=str(self.user.name)) #os.chmod(biojhubhome, 0o755) subprocess.call('sudo -u '+str(self.user.name)+' mkdir -p '+str(biojhubhome), shell=True) subprocess.call('sudo -u '+str(self.user.name)+' chmod 755 '+str(biojhubhome), shell=True) env['SINGULARITY_HOME'] = biojhubhome+":/home/jovyan" return env async def start(self): """Start the single-user server.""" self.port = random_port() cmd = [] env = self.get_env() cmd.extend(self.cmd) cmd.extend(self.get_args()) if self.shell_cmd: # using shell_cmd (e.g. bash -c), # add our cmd list as the last (single) argument: cmd = self.shell_cmd + [' '.join(pipes.quote(s) for s in cmd)] self.log.info("Spawning %s", ' '.join(pipes.quote(s) for s in cmd)) popen_kwargs = dict( preexec_fn=self.make_preexec_fn(self.user.name), start_new_session=True, # don't forward signals ) popen_kwargs.update(self.popen_kwargs) # don't let user config override env popen_kwargs['env'] = env try: self.proc = Popen(cmd, **popen_kwargs) except PermissionError: # use which to get abspath script = shutil.which(cmd[0]) or cmd[0] self.log.error( "Permission denied trying to run %r. Does %s have access to this file?", script, self.user.name, ) raise self.pid = self.proc.pid if self.__class__ is not LocalProcessSpawner: # subclasses may not pass through return value of super().start, # relying on deprecated 0.6 way of setting ip, port, # so keep a redundant copy here for now. # A deprecation warning will be shown if the subclass # does not return ip, port. if self.ip: self.server.ip = self.ip self.server.port = self.port self.db.commit() return (self.ip or '127.0.0.1', self.port)