def init_mck_secrets(c):
    auth_type = get_config('auth.type')

    if auth_type == "custom":
        full_class_name = get_config('auth.custom.className')
        c.JupyterHub.authenticator_class = full_class_name
        auth_class_name = full_class_name.rsplit('.', 1)[-1]
        auth_config = c[auth_class_name]

        current_config = get_config('auth.custom.config' or {})

        new_config = {k:get_value(current_config[k]) for k in current_config}
        auth_config.update(new_config)

    # cookie secret
    c.JupyterHub.cookie_secret = get_value(get_config("custom.hub_cookie_secret")).encode()

    # proxy token
    # the proxy secret can't be "read in " using get_config(). 
    # setting the auth token like this will break things unless the
    # the proxy pod is also using the same auth token. 
    # c.ConfigurableHTTPProxy.auth_token = get_value("mck://proxy_secret")


    # jup client id
    # c.KubeSpawner.environment
    singleUserEnv = get_config("singleuser.extraEnv" or {})
    c.KubeSpawner.environment = {k:get_value(singleUserEnv[k]) for k in singleUserEnv}
def volume_stopping_tag(spawner):
    import datetime

    import boto3

    import z2jh

    pvc_name = spawner.pvc_name
    cluster_name = z2jh.get_config('custom.CLUSTER_NAME')
    az_name = z2jh.get_config('custom.AZ_NAME')
    region_name = az_name[:-1]

    session = boto3.Session(region_name=region_name)
    ec2 = session.client('ec2')

    print(
        f"Updating stopping tags to '{pvc_name}' in cluster '{cluster_name}'..."
    )

    vol = ec2.describe_volumes(
        Filters=[{
            'Name': 'tag:kubernetes.io/created-for/pvc/name',
            'Values': [pvc_name]
        }, {
            'Name': 'tag:kubernetes.io/cluster/{0}'.format(cluster_name),
            'Values': ['owned']
        }])

    vol = vol['Volumes']

    if len(vol) > 1:
        raise Exception(
            "\n ***** More than one volume for pvc: {0}".format(pvc_name))

    if len(vol) != 1:
        vol = []
    else:
        vol = vol[0]

    if vol:
        ec2.create_tags(DryRun=False,
                        Resources=[vol['VolumeId']],
                        Tags=[
                            {
                                'Key': 'jupyter-volume-stopping-time',
                                'Value': '{0}'.format(datetime.datetime.now())
                            },
                        ])
Exemple #3
0
 def __init__(self, **kwargs):
     super(PersistentBinderSpawner, self).__init__(**kwargs)
     # get default_project from custom config of z2jh chart (`binderhub.jupyterhub.custom`)
     # https://zero-to-jupyterhub.readthedocs.io/en/latest/administrator/advanced.html#custom-configuration
     default_project = z2jh.get_config('custom.default_project')
     display_name = self.url_to_display_name(default_project["repo_url"])
     # default_project is only to use when first login
     self.default_project = [default_project["repo_url"], '', default_project["ref"], display_name, 'never']
 def __init__(self, **kwargs):
     super().__init__(**kwargs)
     # get default_project from custom config of z2jh chart (`binderhub.jupyterhub.custom`)
     # https://zero-to-jupyterhub.readthedocs.io/en/latest/administrator/advanced.html#custom-configuration
     default_project = z2jh.get_config("custom.default_project")
     display_name = self.url_to_display_name(default_project["repo_url"])
     # default_project is only to use when first login
     self.default_project = {
         "repo_url": default_project["repo_url"],
         "image": "",
         "ref": default_project["ref"],
         "display_name": display_name,
         "last_used": "never",
     }
Exemple #5
0
    def pre_spawn_start(self, user, spawner):
        """Pass upstream_token to spawner via environment variable"""
        auth_state = yield user.get_auth_state()
        if not auth_state:
            raise Exception('auth state must be enabled')

        spawner.environment.update(
            {'TZ': auth_state['oauth_user'].get('timezone', 'UTC')})

        if spawner.extra_resource_limits.get('nvidia.com/gpu', 0) == 0:
            spawner.environment.update({'NVIDIA_VISIBLE_DEVICES': 'none'})

        start_notebook_config = get_config('custom.startNotebookConfigMap')
        if start_notebook_config:
            spawner.volumes.append({
                'name': 'start-notebook-d',
                'config_map': {
                    'name': start_notebook_config
                }
            })
            spawner.volume_mounts.append({
                'mountPath': '/usr/local/bin/start-notebook.d',
                'name': 'start-notebook-d'
            })
# Connect to a proxy running in a different pod
c.ConfigurableHTTPProxy.api_url = 'http://{}:{}'.format(
    os.environ['PROXY_API_SERVICE_HOST'],
    int(os.environ['PROXY_API_SERVICE_PORT']))
c.ConfigurableHTTPProxy.should_start = False

# Do not shut down user pods when hub is restarted
c.JupyterHub.cleanup_servers = False

# Check that the proxy has routes appropriately setup
# This isn't the best named setting :D
c.JupyterHub.last_activity_interval = 60

# Max number of servers that can be spawning at any one time
c.JupyterHub.concurrent_spawn_limit = get_config('hub.concurrent-spawn-limit')

active_server_limit = get_config('hub.active-server-limit', None)

if active_server_limit is not None:
    c.JupyterHub.active_server_limit = int(active_server_limit)

c.JupyterHub.ip = os.environ['PROXY_PUBLIC_SERVICE_HOST']
c.JupyterHub.port = int(os.environ['PROXY_PUBLIC_SERVICE_PORT'])

# the hub should listen on all interfaces, so the proxy can access it
c.JupyterHub.hub_ip = '0.0.0.0'

c.KubeSpawner.namespace = os.environ.get('POD_NAMESPACE', 'default')

c.KubeSpawner.start_timeout = get_config('singleuser.start-timeout')
    def start(self):
        """Starts the user's pod with `user_options`, which is set by binder.
        Before starting the notebook server, starts an `initContainer` which
        first gets the information of projects to delete from state["deleted_projects"] (`spawners` table),
        then deletes these projects on disk (user storage),
        and then copies content of image's home dir into project dir if project dir doesn't exist.

        Starts the notebook server with 2 mounts,
        first one is the user storage (where all projects take place), which is mounted to `/projects` and
        second one is currently launched project's dir on the user storage, which is mounted to `/home/jovyan`.

        Note: init and notebooks containers shares a volume (user storage), that's how project content, which is
        copied by init container, is also available to notebook container.
        """
        # clean attributes, so we dont save wrong values in state when error happens
        for attr in ('repo_url', 'ref', 'image'):
            self.__dict__.pop(attr, None)

        # get image spec from user_options
        if 'image' in self.user_options and \
           'repo_url' in self.user_options and \
           'token' in self.user_options:
            # binder service sets the image spec via user options
            # user_options is saved in database, so even user deletes all projects,
            # user_options for last launched repo stays in database
            # NOTE: user can pass any options through API (without using binder) too
            self.image = self.user_options['image']
            self.ref = self.image.split(':')[-1]
            # repo_url is generated in binderhub by repo providers
            self.repo_url = self.user_options['repo_url']
            # strip .git at the end
            if self.repo_url.endswith('.git'):
                self.repo_url = self.repo_url[:-4]
        else:
            # if user never launched a repo before (user_options in database is empty)
            # and user is trying to start the server via spawn url
            # normally this shouldn't happen and
            # it would be good but we can't display a message to user, and raising errors cause hub to restart
            # so (as a workaround) launch a repo until we handle this better FIXME
            projects = self.get_state_field('projects')
            if projects and projects[-1].get("image"):
                # first be sure that user has no valid projects
                self.repo_url = projects[-1]["repo_url"]
                self.image = projects[-1]["image"]
                self.ref = projects[-1]["ref"]
                self.log.warning(
                    f"Project '{self.repo_url}' with '{self.image}' doesn't exist in user_options."
                )
            else:
                msg = f"User ({self.user.name}) is trying to start the server via spawn url."
                # self.handler.redirect("/hub/home")
                # raise Exception(msg)
                self.log.info(msg)
                self.repo_url = "https://github.com/gesiscss/persistent_binderhub"
                self.image = "gesiscss/binder-gesiscss-2dpersistent-5fbinderhub-ab107f:0.2.0-n652"
                self.ref = self.image.split(':')[-1]
        self.log.info(
            f"User ({self.user.name}) is launching '{self.repo_url}' project with '{self.image}'."
        )

        # prepare the initContainer
        # NOTE: first initContainer runs and when it is done, then notebook container runs
        # https://kubernetes.io/docs/concepts/workloads/pods/init-containers/
        # https://kubernetes.io/docs/tasks/configure-pod-container/configure-pod-initialization/#create-a-pod-that-has-an-init-container
        # https://github.com/jupyterhub/kubespawner/blob/v0.8.1/kubespawner/spawner.py#L638-L664
        mount_path = '/projects/'
        # first it deletes projects on disk (if there are any to delete)
        # get list of projects to delete from disk before spawn in initContainer
        deleted_projects = self.get_state_field('deleted_projects')
        if deleted_projects:
            delete_cmd = f"rm -rf {' '.join([join(mount_path, self.url_to_dir(d)) for d in deleted_projects])}"
            self.log.info(
                f"Following projects will be deleted for user ({self.user.name}): {deleted_projects}"
            )
        else:
            delete_cmd = ""
        # then copies image's home dir (repo content after r2d process)
        # into project's dir on disk (if project_path doesnt exists on persistent disk)
        project_dir = self.url_to_dir(self.repo_url)
        project_path = join(mount_path, project_dir)
        copy_cmd = f"if [ -d {project_path} ]; " \
                   f"then echo 'directory {project_path} exists'; " \
                   f"elif [ -L {project_path} ]; " \
                   f"then echo '{project_path} is a symlink'; " \
                   f"else mkdir {project_path} && cp -a ~/. {project_path}; fi"
        init_container_cmds = [delete_cmd, copy_cmd
                               ] if delete_cmd else [copy_cmd]
        command = ["/bin/sh", "-c", " && ".join(init_container_cmds)]
        self.log.debug(
            f"Following command will be executed for user ({self.user.name}): {command}"
        )
        projects_volume_mount = {
            'name': self.volumes[0]['name'],
            'mountPath': mount_path
        }
        # NOTE: if binder "start" config is defined
        #  (https://mybinder.readthedocs.io/en/latest/config_files.html#start-run-code-before-the-user-sessions-starts)
        #  and if start command changes the content,
        #  initcontainer misses that change.
        #  because start command is run as an ENTRYPOINT and initcontainer's command overwrites it
        #  But start command will be executed in notebook container (because we dont define a custom command for it),
        #  so change will take place there, and on user's side, there is no problem
        self.init_containers.append({
            "name": "project-manager",
            "image": self.image,
            "command": command,
            # volumes is already defined for notebook container (self.volumes)
            "volume_mounts": [projects_volume_mount],
        })

        # notebook container (user server)
        # mount all projects (complete user disk) to /projects
        # first remove existing volume mounts to /projects, this mount path should be unique,
        # normally we shouldn't need this, but sometimes there is duplication when there is a spawn error
        # for example timeout error due to long docker pull (of a notebook server image)
        for i, v_m in enumerate(self.volume_mounts):
            if v_m['mountPath'] == projects_volume_mount['mountPath']:
                del self.volume_mounts[i]

        # only mount /projects in the user server if mounting all projects is enabled
        if z2jh.get_config('custom.mount_all_projects'):
            self.volume_mounts.append(projects_volume_mount)

        # mountPath is /home/jovyan, this is set in z2jh helm chart values.yaml
        # mount_path = "~/"
        # mount_path = "$(HOME)"
        # self.volume_mounts[0]['mountPath'] = mount_path
        # https://kubernetes.io/docs/concepts/storage/volumes/#using-subpath
        # mount only project_path to home
        self.volume_mounts[0]['subPath'] = project_dir

        self.reset_deleted_projects = True
        return super().start()
Exemple #8
0
for trait, cfg_key in (
    ('start_timeout', 'start-timeout'),
    ('image_pull_policy', 'image-pull-policy'),
    ('image_pull_secrets', 'image-pull-secret-name'),
    ('events_enabled', 'events'),
    ('extra_labels', 'extra-labels'),
    ('extra_annotations', 'extra-annotations'),
    ('uid', 'uid'),
    ('fs_gid', 'fs-gid'),
    ('service_account', 'service-account-name'),
    ('scheduler_name', 'scheduler-name'),
    ('node_selector', 'node-selector'),
):
    set_config_if_not_none(c.KubeSpawner, trait, 'singleuser.' + cfg_key)

c.KubeSpawner.image_spec = get_config('singleuser.image-spec')
# Configure dynamically provisioning pvc
storage_type = get_config('singleuser.storage.type')
if storage_type == 'dynamic':
    pvc_name_template = get_config('singleuser.storage.dynamic.pvc-name-template')
    c.KubeSpawner.pvc_name_template = pvc_name_template
    volume_name_template = get_config('singleuser.storage.dynamic.volume-name-template')
    c.KubeSpawner.storage_pvc_ensure = True
    set_config_if_not_none(c.KubeSpawner, 'storage_class', 'singleuser.storage.dynamic.storage-class')
    set_config_if_not_none(c.KubeSpawner, 'storage_access_modes', 'singleuser.storage.dynamic.storage-access-modes')
    set_config_if_not_none(c.KubeSpawner, 'storage_capacity', 'singleuser.storage.capacity')

    # Add volumes to singleuser pods
    c.KubeSpawner.volumes = [
        {
            'name': volume_name_template,
Exemple #9
0
c.JupyterHub.tornado_settings = {
    'slow_spawn_timeout': 0,
}


def camelCaseify(s):
    """convert snake_case to camelCase

    For the common case where some_value is set from someValue
    so we don't have to specify the name twice.
    """
    return re.sub(r"_([a-z])", lambda m: m.group(1).upper(), s)


# configure the hub db connection
db_type = get_config('hub.db.type')
if db_type == 'sqlite-pvc':
    c.JupyterHub.db_url = "sqlite:///jupyterhub.sqlite"
elif db_type == "sqlite-memory":
    c.JupyterHub.db_url = "sqlite://"
else:
    set_config_if_not_none(c.JupyterHub, "db_url", "hub.db.url")

for trait, cfg_key in (
        # Max number of servers that can be spawning at any one time
    ('concurrent_spawn_limit', None),
        # Max number of servers to be running at one time
    ('active_server_limit', None),
        # base url prefix
    ('base_url', None),
    ('allow_named_servers', None),
Exemple #10
0
# Connect to a proxy running in a different pod
api_proxy_service_name = os.environ["PROXY_API_SERVICE_NAME"]
c.ConfigurableHTTPProxy.api_url = 'http://{}:{}'.format(
    os.environ[api_proxy_service_name + "_HOST"],
    int(os.environ[api_proxy_service_name + "_PORT"]))
c.ConfigurableHTTPProxy.should_start = False

# Do not shut down user pods when hub is restarted
c.JupyterHub.cleanup_servers = False

# Check that the proxy has routes appropriately setup
# This isn't the best named setting :D
c.JupyterHub.last_activity_interval = 60

# Max number of servers that can be spawning at any one time
c.JupyterHub.concurrent_spawn_limit = get_config('hub.concurrent-spawn-limit')

# Max number of consecutive failures before the Hub restarts itself
# requires jupyterhub 0.9.2
c.Spawner.consecutive_failure_limit = get_config(
    'hub.consecutive-failure-limit', 0)

active_server_limit = get_config('hub.active-server-limit', None)
if active_server_limit is not None:
    c.JupyterHub.active_server_limit = int(active_server_limit)

public_proxy_service_name = os.environ["PROXY_PUBLIC_SERVICE_NAME"]
c.JupyterHub.ip = os.environ[public_proxy_service_name + "_HOST"]
c.JupyterHub.port = int(os.environ[public_proxy_service_name + "_PORT"])

# the hub should listen on all interfaces, so the proxy can access it
Exemple #11
0
c.ConfigurableHTTPProxy.should_start = False

# Do not shut down user pods when hub is restarted
c.JupyterHub.cleanup_servers = False

# Check that the proxy has routes appropriately setup
c.JupyterHub.last_activity_interval = 60

# Don't wait at all before redirecting a spawning user to the progress page
c.JupyterHub.tornado_settings = {
    "slow_spawn_timeout": 0,
}


# configure the hub db connection
db_type = get_config("hub.db.type")
if db_type == "sqlite-pvc":
    c.JupyterHub.db_url = "sqlite:///jupyterhub.sqlite"
elif db_type == "sqlite-memory":
    c.JupyterHub.db_url = "sqlite://"
else:
    set_config_if_not_none(c.JupyterHub, "db_url", "hub.db.url")
db_password = get_secret_value("hub.db.password", None)
if db_password is not None:
    if db_type == "mysql":
        os.environ["MYSQL_PWD"] = db_password
    elif db_type == "postgres":
        os.environ["PGPASSWORD"] = db_password
    else:
        print(f"Warning: hub.db.password is ignored for hub.db.type={db_type}")
Exemple #12
0
    oidc_client_secret = os.environ.get('KC_CLIENT_SECRET', '')
scope_required = get_primehub_config('scopeRequired')
role_prefix = get_primehub_config('keycloak.rolePrefix', "")
base_url = get_primehub_config('baseUrl', "/")
enable_feature_kernel_gateway = get_primehub_config('kernelGateway', "")
jupyterhub_template_path = '/etc/jupyterhub/templates'
start_notebook_config = get_primehub_config('startNotebookConfigMap')
template_loader = Environment(
    loader=FileSystemLoader(jupyterhub_template_path))

if role_prefix:
    role_prefix += ':'

graphql_endpoint = get_primehub_config('graphqlEndpoint')
graphql_secret = get_primehub_config('graphqlSecret')
fs_group_id = get_config('singleuser.fsGid')

autoscaling_enabled = get_config('scheduling.userScheduler.enabled')

phfs_enabled = get_primehub_config('phfsEnabled', False)
phfs_pvc = get_primehub_config('phfsPVC', '')

# Support old group volume convention.
support_old_group_volume_convention = os.environ.get(
    'SUPPORT_OLD_GROUP_VOLUME_CONVENTION', False) == "true"

# Uncomment below to setup prefix (e.g. Bearer) for API key, if needed
# configuration.api_key_prefix['authorization'] = 'Bearer'

BACKEND_API_UNAVAILABLE = 'API_UNAVAILABLE'
GRAPHQL_LAUNCH_CONTEXT_QUERY = '''query ($id: ID!) {
Exemple #13
0
c.ConfigurableHTTPProxy.should_start = False

# Do not shut down user pods when hub is restarted
c.JupyterHub.cleanup_servers = False

# Check that the proxy has routes appropriately setup
c.JupyterHub.last_activity_interval = 60

# Don't wait at all before redirecting a spawning user to the progress page
c.JupyterHub.tornado_settings = {
    "slow_spawn_timeout": 0,
}


# configure the hub db connection
db_type = get_config("hub.db.type")
if db_type == "sqlite-pvc":
    c.JupyterHub.db_url = "sqlite:///jupyterhub.sqlite"
elif db_type == "sqlite-memory":
    c.JupyterHub.db_url = "sqlite://"
else:
    set_config_if_not_none(c.JupyterHub, "db_url", "hub.db.url")


# c.JupyterHub configuration from Helm chart's configmap
for trait, cfg_key in (
    ("concurrent_spawn_limit", None),
    ("active_server_limit", None),
    ("base_url", None),
    # ('cookie_secret', None),  # requires a Hex -> Byte transformation
    ("allow_named_servers", None),
Exemple #14
0
scope_required = get_primehub_config('scopeRequired')
role_prefix = get_primehub_config('keycloak.rolePrefix', "")
base_url = get_primehub_config('baseUrl', "/")
enable_feature_kernel_gateway = get_primehub_config('kernelGateway', "")
enable_feature_ssh_server = get_primehub_config('sshServer.enabled', False)
jupyterhub_template_path = '/etc/jupyterhub/templates'
start_notebook_config = get_primehub_config('startNotebookConfigMap')
template_loader = Environment(
    loader=FileSystemLoader(jupyterhub_template_path))

if role_prefix:
    role_prefix += ':'

graphql_endpoint = get_primehub_config('graphqlEndpoint')
graphql_secret = os.environ.get('GRAPHQL_SHARED_SECRET', get_primehub_config('graphqlSecret'))
fs_group_id = get_config('singleuser.fsGid')

autoscaling_enabled = get_config('scheduling.userScheduler.enabled')

phfs_enabled = get_primehub_config('phfsEnabled', False)
phfs_pvc = get_primehub_config('phfsPVC', '')

grantSudo = get_primehub_config('grantSudo', True)

# Support old group volume convention.
support_old_group_volume_convention = os.environ.get(
    'SUPPORT_OLD_GROUP_VOLUME_CONVENTION', False) == "true"

# Uncomment below to setup prefix (e.g. Bearer) for API key, if needed
# configuration.api_key_prefix['authorization'] = 'Bearer'
Exemple #15
0
c.JupyterHub.tornado_settings = {
    'slow_spawn_timeout': 0,
}


def camelCaseify(s):
    """convert snake_case to camelCase

    For the common case where some_value is set from someValue
    so we don't have to specify the name twice.
    """
    return re.sub(r"_([a-z])", lambda m: m.group(1).upper(), s)


# configure the hub db connection
db_type = get_config('hub.db.type')
if db_type == 'sqlite-pvc':
    c.JupyterHub.db_url = "sqlite:///jupyterhub.sqlite"
elif db_type == "sqlite-memory":
    c.JupyterHub.db_url = "sqlite://"
else:
    set_config_if_not_none(c.JupyterHub, "db_url", "hub.db.url")


for trait, cfg_key in (
    # Max number of servers that can be spawning at any one time
    ('concurrent_spawn_limit', None),
    # Max number of servers to be running at one time
    ('active_server_limit', None),
    # base url prefix
    ('base_url', None),
Exemple #16
0
import urllib
from z2jh import get_config
from oauthenticator.generic import GenericOAuthenticator, OAuthLoginHandler
from tornado.auth import OAuth2Mixin
from tornado import gen

from jupyterhub.handlers import LogoutHandler

keycloak_url = os.environ['PRIMEHUB_KEYCLOAK_URL']
keycloak_realm = os.environ['PRIMEHUB_KEYCLOAK_REALM']
keycloak_client_id = os.environ['PRIMEHUB_KEYCLOAK_CLIENT_ID']
keycloak_client_secret = os.environ['PRIMEHUB_KEYCLOAK_CLIENT_SECRET']

scope_required = get_config('custom.scopeRequired')


class KeycloakMixin(OAuth2Mixin):
    _OAUTH_AUTHORIZE_URL = "%s/auth/realms/%s/protocol/openid-connect/auth" % (
        keycloak_url, keycloak_realm)
    _OAUTH_ACCESS_TOKEN_URL = "%s/auth/realms/%s/protocol/openid-connect/token" % (
        keycloak_url, keycloak_realm)


class PrimeHubLogoutHandler(LogoutHandler):
    kc_logout_url = '%s/auth/realms/%s/protocol/openid-connect/logout' % (
        keycloak_url, keycloak_realm)

    @gen.coroutine
    def get(self):
        # redirect to keycloak logout url and redirect back with kc=true parameters
        # then proceed with the original logout method.
def volume_from_snapshot(spawner):

    import re

    import boto3
    import yaml
    from kubernetes import client as k8s_client
    from kubernetes import config as k8s_config
    from kubernetes.client.rest import ApiException

    import z2jh

    k8s_config.load_incluster_config()
    api = k8s_client.CoreV1Api()

    username = spawner.user.name
    pvc_name = spawner.pvc_name
    namespace = 'jupyter'
    cluster_name = z2jh.get_config('custom.CLUSTER_NAME')
    cost_tag_key = z2jh.get_config('custom.COST_TAG_KEY')
    cost_tag_value = z2jh.get_config('custom.COST_TAG_VALUE')
    az_name = z2jh.get_config('custom.AZ_NAME')
    vol_size = spawner.storage_capacity
    spawn_pvc = spawner.get_pvc_manifest()
    region_name = az_name[:-1]

    print(
        f"Spawner gives storage as {vol_size}. If restoring from a snapshot, the size may be different."
    )

    alpha = " ".join(re.findall("[a-zA-Z]+", vol_size)).lower()
    number = int(" ".join(re.findall("[0-9]+", vol_size)))

    possible_units = {
        "ei": 2**60,
        "pi": 2**50,
        "ti": 2**40,
        "gi": 2**30,
        "mi": 2**20,
        "ki": 2**10,
        "e": 10**18,
        "p": 10**15,
        "t": 10**12,
        "g": 10**9,
        "m": 10**6,
        "k": 10**3,
        "": 1
    }

    vol_size = number * possible_units[alpha]

    # Volume needs to be in GiB (an int without the label)
    vol_size = int(vol_size * 2**-30)
    if vol_size < 0:
        vol_size = 1

    session = boto3.Session(region_name=region_name)
    ec2 = session.client('ec2')

    pvcs = api.list_namespaced_persistent_volume_claim(namespace=namespace,
                                                       watch=False)

    has_pvc = False
    for items in pvcs.items:
        if items.metadata.name == pvc_name:
            print(
                "PVC '{pvc_name}' exists! Therefore a volume should have already been assigned to user '{username}'."
                .format(pvc_name=pvc_name, username=username))
            has_pvc = True

    if not has_pvc:
        print(
            "PVC '{pvc_name}' does not exist. Therefore a volume will have to be created for user '{username}'."
            .format(pvc_name=pvc_name, username=username))

        # Does the user have any snapshots?
        snap = ec2.describe_snapshots(Filters=[{
            'Name': 'tag:kubernetes.io/created-for/pvc/name',
            'Values': [pvc_name]
        }, {
            'Name':
            'tag:kubernetes.io/cluster/{cluster_name}'.format(
                cluster_name=cluster_name),
            'Values': ['owned']
        }, {
            'Name': 'status',
            'Values': ['completed']
        }],
                                      OwnerIds=['self'])
        snap = snap['Snapshots']

        if len(snap) > 1:
            snap = sorted(snap, key=lambda s: s['StartTime'], reverse=True)
            print(
                f"\nWARNING ***** More than one snapshot found for pvc: {pvc_name}. Claiming the latest one: \n{snap[0]}."
            )
        elif len(snap) == 0:
            print(f"No snapshot found that matched pvc '{pvc_name}'")
            snap = [None]

        snapshot = snap[0]

        if snapshot:
            # Guarantee that the volume never shrinks if the spawner's volume is smaller than the snapshot
            if snapshot['VolumeSize'] > vol_size:
                vol_size = snapshot['VolumeSize']

            print("Creating volume from snapshot...")
            vol = ec2.create_volume(
                AvailabilityZone=az_name,
                Encrypted=False,
                Size=vol_size,
                SnapshotId=snapshot['SnapshotId'],
                VolumeType='gp2',
                DryRun=False,
                TagSpecifications=[
                    {
                        'ResourceType':
                        'volume',
                        'Tags': [{
                            'Key':
                            'Name',
                            'Value':
                            '{username}-{cluster_name}'.format(
                                cluster_name=cluster_name, username=username)
                        }, {
                            'Key':
                            'kubernetes.io/cluster/{cluster_name}'.format(
                                cluster_name=cluster_name),
                            'Value':
                            'owned'
                        }, {
                            'Key': 'kubernetes.io/created-for/pvc/namespace',
                            'Value': namespace
                        }, {
                            'Key': 'kubernetes.io/created-for/pvc/name',
                            'Value': pvc_name
                        }, {
                            'Key': 'RestoredFromSnapshot',
                            'Value': 'True'
                        }]
                    },
                ])
            vol_id = vol['VolumeId']
            print(f"Volume {vol_id} created.")

            this_val = get_tag_value(snapshot, 'jupyter-volume-stopping-time')
            if this_val:
                ec2.create_tags(DryRun=False,
                                Resources=[vol_id],
                                Tags=[
                                    {
                                        'Key': 'jupyter-volume-stopping-time',
                                        'Value': this_val
                                    },
                                ])

            # If do-not-delete tag was present in snapshot, add to volume tags
            if get_tag_value(snapshot, 'do-not-delete'):
                ec2.create_tags(DryRun=False,
                                Resources=[vol_id],
                                Tags=[
                                    {
                                        'Key': 'do-not-delete',
                                        'Value': 'True'
                                    },
                                ])

            # If the billing tag is present in the snapshot, add to volume tags
            # If the tag doesn't exist in the snapshot, the default is `cost_tag_value`
            this_val = get_tag_value(snapshot, cost_tag_key)
            if not this_val:
                this_val = cost_tag_value
            ec2.create_tags(DryRun=False,
                            Resources=[vol_id],
                            Tags=[
                                {
                                    'Key': cost_tag_key,
                                    'Value': this_val
                                },
                            ])

            annotations = spawn_pvc.metadata.annotations
            labels = spawn_pvc.metadata.labels

            # Get PVC manifest
            with open("/home/jovyan/hooks/pvc.yaml", mode='r') as f:
                pvc_yaml = f.read().format(annotations=annotations,
                                           cluster_name=cluster_name,
                                           labels=labels,
                                           name=pvc_name,
                                           namespace=namespace,
                                           vol_size=vol_size,
                                           vol_id=vol_id)

            pvc_manifest = yaml.safe_load(pvc_yaml)

            # Get PV manifest
            with open("/home/jovyan/hooks/pv.yaml", mode='r') as f:
                pv_yaml = f.read()

            annotations = pvc_manifest['metadata']['annotations']

            pv_yaml = pv_yaml.format(annotations=annotations,
                                     cluster_name=cluster_name,
                                     region_name=region_name,
                                     az_name=az_name,
                                     pvc_name=pvc_name,
                                     namespace=namespace,
                                     vol_id=vol_id,
                                     storage=pvc_manifest['spec']['resources']
                                     ['requests']['storage'])

            pv_manifest = yaml.safe_load(pv_yaml)

            # https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/CoreV1Api.md#create_persistent_volume
            print("Creating persistent volume...")
            try:
                api.create_persistent_volume(body=pv_manifest)
            except ApiException as e:
                if e.status == 409:
                    print(
                        f"PV {vol_id} already exists, so did not create new pvc."
                    )
                else:
                    raise

            # https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/CoreV1Api.md#create_namespaced_persistent_volume_claim
            print("Creating persistent volume claim...")
            try:
                api.create_namespaced_persistent_volume_claim(
                    body=pvc_manifest, namespace=namespace)
            except ApiException as e:
                if e.status == 409:
                    print(
                        f"PVC {pvc_name} already exists, so did not create new pvc."
                    )
                else:
                    raise
def setup_homedir_sharding():
    # Inside a function to prevent scopes from leaking

    username = os.environ['SHARDER_DB_USERNAME']
    password = os.environ['SHARDER_DB_PASSWORD']
    dbname = os.environ['SHARDER_DB_NAME']

    deployment = z2jh.get_config('custom.deployment')
    nfs_server_template = '{deployment}-{name}'
    fileservers = [
        nfs_server_template.format(deployment=deployment, name=name)
        for name in yaml.safe_load(z2jh.get_config('custom.fileservers'))
    ]
    sharder = Sharder('localhost', username, password, dbname, 'homedir',
                      fileservers, log.app_log)

    allowed_external_hosts = z2jh.get_config('custom.allowed-external-hosts')

    class CustomSpawner(KubeSpawner):
        _sharder_thread_pool = ThreadPoolExecutor(max_workers=1)

        @concurrent.run_on_executor(executor='_sharder_thread_pool')
        def shard(self, username):
            if hasattr(self, '_fileserver_shard'):
                return self._fileserver_shard

            self._fileserver_shard = sharder.shard(username)
            return self._fileserver_shard

        @gen.coroutine
        def start(self):
            nfsserver = yield self.shard(self.user.name)
            self.volumes = [{
                'name': 'home',
                'hostPath': {
                    'path':
                    '/mnt/fileservers/{fileserver}/{username}'.format(
                        fileserver=nfsserver,
                        username=escapism.escape(self.user.name))
                }
            }]
            self.volume_mounts = [{
                'name': 'home',
                'mountPath': '/home/jovyan'
            }]

            if self.user.admin:
                self.volumes.append({
                    'name': 'fileservers',
                    'hostPath': {
                        'path': '/mnt/fileservers'
                    }
                })
                self.volume_mounts.append({
                    'name':
                    'fileservers',
                    'mountPath':
                    '/home/jovyan/fileservers'
                })

            self.singleuser_extra_pod_config = {
                'hostAliases': [{
                    'ip': socket.gethostbyname('egress-proxy'),
                    'hostnames': allowed_external_hosts
                }]
            }
            return (yield super().start())

    c.JupyterHub.spawner_class = CustomSpawner