Esempio n. 1
0
def validate_support_config(cluster_name):
    """
    Validates the provided non-encrypted helm chart values files for the support chart
    of a specific cluster.
    """
    _prepare_helm_charts_dependencies_and_schemas()

    config_file_path = find_absolute_path_to_cluster_file(cluster_name)
    with open(config_file_path) as f:
        cluster = Cluster(yaml.load(f), config_file_path.parent)

    if cluster.support:
        print_colour(
            f"Validating non-encrypted support values files for {cluster_name}..."
        )

        cmd = [
            "helm",
            "template",
            str(helm_charts_dir.joinpath("support")),
        ]

        for values_file in cluster.support["helm_chart_values_files"]:
            cmd.append(f"--values={config_file_path.parent.joinpath(values_file)}")

            try:
                subprocess.check_output(cmd, text=True)
            except subprocess.CalledProcessError as e:
                print(e.stdout)
                sys.exit(1)
    else:
        print_colour(f"No support defined for {cluster_name}. Nothing to validate!")
Esempio n. 2
0
def use_cluster_credentials(cluster_name):
    """
    Quickly gain command-line access to a cluster by updating the current
    kubeconfig file to include the deployer's access credentials for the named
    cluster and mark it as the cluster to work against by default.

    This function is to be used with the `use-cluster-credentials` CLI
    command only - it is not used by the rest of the deployer codebase.
    """
    validate_cluster_config(cluster_name)

    config_file_path = find_absolute_path_to_cluster_file(cluster_name)
    with open(config_file_path) as f:
        cluster = Cluster(yaml.load(f), config_file_path.parent)

    # Cluster.auth() method has the context manager decorator so cannot call
    # it like a normal function
    with cluster.auth():
        # This command will spawn a new shell with all the env vars (including
        # KUBECONFIG) inherited, and once you quit that shell the python program
        # will resume as usual.
        # TODO: Figure out how to change the PS1 env var of the spawned shell
        # to change the prompt to f"cluster-{cluster.spec['name']}". This will
        # make it visually clear that the user is now operating in a different
        # shell.
        subprocess.check_call([os.environ["SHELL"], "-l"])
Esempio n. 3
0
def validate_cluster_config(cluster_name):
    """
    Validates cluster.yaml configuration against a JSONSchema.
    """
    cluster_schema_file = Path(os.getcwd()).joinpath("shared", "deployer",
                                                     "cluster.schema.yaml")
    cluster_file = find_absolute_path_to_cluster_file(cluster_name)

    with open(cluster_file) as cf, open(cluster_schema_file) as sf:
        cluster_config = yaml.load(cf)
        schema = yaml.load(sf)
        # Raises useful exception if validation fails
        jsonschema.validate(cluster_config, schema)
Esempio n. 4
0
def exec_homes_shell(cluster_name, hub_name):
    """
    Pop a shell with the home directories of the given hub mounted

    Homes will be mounter under /home
    """
    config_file_path = find_absolute_path_to_cluster_file(cluster_name)
    with open(config_file_path) as f:
        cluster = Cluster(yaml.load(f), config_file_path.parent)
    with cluster.auth():
        hubs = cluster.hubs
        hub = next((hub for hub in hubs if hub.spec["name"] == hub_name), None)
        hub.exec_homes_shell()
Esempio n. 5
0
def deploy(cluster_name, hub_name, config_path, dask_gateway_version):
    """
    Deploy one or more hubs in a given cluster
    """
    validate_cluster_config(cluster_name)
    validate_hub_config(cluster_name, hub_name)
    assert_single_auth_method_enabled(cluster_name, hub_name)

    with get_decrypted_file(config_path) as decrypted_file_path:
        with open(decrypted_file_path) as f:
            config = yaml.load(f)

    # Most of our hubs use Auth0 for Authentication. This lets us programmatically
    # determine what auth provider each hub uses - GitHub, Google, etc. Without
    # this, we'd have to manually generate credentials for each hub - and we
    # don't want to do that. Auth0 domains are tied to a account, and
    # this is our auth0 domain for the paid account that 2i2c has.
    auth0 = config["auth0"]

    k = KeyProvider(auth0["domain"], auth0["client_id"],
                    auth0["client_secret"])

    # Each hub needs a unique proxy.secretToken. However, we don't want
    # to manually generate & save it. We also don't want it to change with
    # each deploy - that causes a pod restart with downtime. So instead,
    # we generate it based on a single secret key (`PROXY_SECRET_KEY`)
    # combined with the name of each hub. This way, we get unique,
    # cryptographically secure proxy.secretTokens without having to
    # keep much state. We can rotate them by changing `PROXY_SECRET_KEY`.
    # However, if `PROXY_SECRET_KEY` leaks, that means all the hub's
    # proxy.secretTokens have leaked. So let's be careful with that!
    SECRET_KEY = bytes.fromhex(config["secret_key"])

    config_file_path = find_absolute_path_to_cluster_file(cluster_name)
    with open(config_file_path) as f:
        cluster = Cluster(yaml.load(f), config_file_path.parent)

    with cluster.auth():
        hubs = cluster.hubs
        if hub_name:
            hub = next((hub for hub in hubs if hub.spec["name"] == hub_name),
                       None)
            print_colour(f"Deploying hub {hub.spec['name']}...")
            hub.deploy(k, SECRET_KEY, dask_gateway_version)
        else:
            for i, hub in enumerate(hubs):
                print_colour(
                    f"{i+1} / {len(hubs)}: Deploying hub {hub.spec['name']}..."
                )
                hub.deploy(k, SECRET_KEY, dask_gateway_version)
Esempio n. 6
0
def assert_single_auth_method_enabled(cluster_name, hub_name):
    """
    For each hub of a specific cluster, it asserts that only a single auth
    method is enabled. An error is raised when an authenticator
    other than Auth0 is enabled and `auth0` is not explicitly disabled.
    """
    _prepare_helm_charts_dependencies_and_schemas()

    config_file_path = find_absolute_path_to_cluster_file(cluster_name)
    with open(config_file_path) as f:
        cluster = Cluster(yaml.load(f), config_file_path.parent)

    hubs = []
    if hub_name:
        hubs = [h for h in cluster.hubs if h.spec["name"] == hub_name]
    else:
        hubs = cluster.hubs

    for i, hub in enumerate(hubs):
        print_colour(
            f"{i+1} / {len(hubs)}: Validating authenticator config for {hub.spec['name']}..."
        )

        authenticator_class = "auth0"
        for values_file_name in hub.spec["helm_chart_values_files"]:
            if "secret" not in os.path.basename(values_file_name):
                values_file = config_file_path.parent.joinpath(
                    values_file_name)
                # Load the hub extra config from its specific values files
                config = yaml.load(values_file)
                # Check if there's config that specifies an authenticator class
                try:
                    if hub.spec["helm_chart"] != "basehub":
                        authenticator_class = config["basehub"]["jupyterhub"][
                            "hub"]["config"]["JupyterHub"][
                                "authenticator_class"]
                    else:
                        authenticator_class = config["jupyterhub"]["hub"][
                            "config"]["JupyterHub"]["authenticator_class"]
                except KeyError:
                    pass

        # If the authenticator class is other than auth0, then raise an error
        # if auth0 is not explicitly disabled from the cluster config
        if authenticator_class != "auth0" and hub.spec["auth0"].get(
                "enabled", True):
            raise ValueError(
                f"Please disable auth0 for {hub.spec['name']} hub before using another authenticator class!"
            )
Esempio n. 7
0
def get_central_grafana_token(cluster_name):
    """Returns the access token of the Grafana located in `cluster_name` cluster.
    This access token should have enough permissions to create datasources.
    """
    # Get the location of the file that stores the central grafana token
    cluster_config_dir_path = find_absolute_path_to_cluster_file(
        cluster_name).parent

    grafana_token_file = (
        cluster_config_dir_path).joinpath("enc-grafana-token.secret.yaml")

    # Read the secret grafana token file
    with get_decrypted_file(grafana_token_file) as decrypted_file_path:
        with open(decrypted_file_path) as f:
            config = yaml.load(f)

    return config["grafana_token"]
Esempio n. 8
0
def deploy_support(cluster_name, cert_manager_version):
    """Deploy support components to a cluster

    Args:
        cluster_name (str): The name of the cluster to deploy support components to
        cert_manager_version (str): The version of cert-manager to deploy to the
            cluster, in the form vX.Y.Z. where X.Y.Z is valid SemVer.
    """
    validate_cluster_config(cluster_name)
    validate_support_config(cluster_name)

    config_file_path = find_absolute_path_to_cluster_file(cluster_name)
    with open(config_file_path) as f:
        cluster = Cluster(yaml.load(f), config_file_path.parent)

    if cluster.support:
        with cluster.auth():
            cluster.deploy_support(cert_manager_version=cert_manager_version)
Esempio n. 9
0
def get_central_grafana_url(central_cluster_name):
    cluster_config_dir_path = find_absolute_path_to_cluster_file(
        central_cluster_name).parent

    config_file = cluster_config_dir_path.joinpath("support.values.yaml")
    with open(config_file) as f:
        support_config = yaml.load(f)

    grafana_tls_config = (support_config.get("grafana",
                                             {}).get("ingress",
                                                     {}).get("tls", []))

    if not grafana_tls_config:
        raise ValueError(
            f"No tls config was found for the Grafana instance of {central_cluster_name}. Please consider enable it before using it as the central Grafana."
        )

    # We only have one tls host right now. Modify this when things change.
    return grafana_tls_config[0]["hosts"][0]
Esempio n. 10
0
def validate_hub_config(cluster_name, hub_name):
    """
    Validates the provided non-encrypted helm chart values files for each hub of
    a specific cluster.
    """
    _prepare_helm_charts_dependencies_and_schemas()

    config_file_path = find_absolute_path_to_cluster_file(cluster_name)
    with open(config_file_path) as f:
        cluster = Cluster(yaml.load(f), config_file_path.parent)

    hubs = []
    if hub_name:
        hubs = [h for h in cluster.hubs if h.spec["name"] == hub_name]
    else:
        hubs = cluster.hubs

    for i, hub in enumerate(hubs):
        print_colour(
            f"{i+1} / {len(hubs)}: Validating non-encrypted hub values files for {hub.spec['name']}..."
        )

        cmd = [
            "helm",
            "template",
            str(helm_charts_dir.joinpath(hub.spec["helm_chart"])),
        ]
        for values_file in hub.spec["helm_chart_values_files"]:
            if "secret" not in os.path.basename(values_file):
                cmd.append(
                    f"--values={config_file_path.parent.joinpath(values_file)}"
                )
        # Workaround the current requirement for dask-gateway 0.9.0 to have a
        # JupyterHub api-token specified, for updates if this workaround can be
        # removed, see https://github.com/dask/dask-gateway/issues/473.
        if hub.spec["helm_chart"] in ("daskhub", "binderhub"):
            cmd.append(
                "--set=dask-gateway.gateway.auth.jupyterhub.apiToken=dummy")
        try:
            subprocess.check_output(cmd, text=True)
        except subprocess.CalledProcessError as e:
            print(e.stdout)
            sys.exit(1)
Esempio n. 11
0
def get_cluster_prometheus_creds(cluster_name):
    """Retrieves the credentials of the prometheus instance running on the `cluster_name` cluster.
    These credentials are stored in `enc-support.secret.values.yaml` file of each cluster config directory.

    Args:
        cluster_name: name of the cluster
    Returns:
        dict object: {username: `username`, password: `password`}
    """
    cluster_config_dir_path = find_absolute_path_to_cluster_file(
        cluster_name).parent

    config_filename = cluster_config_dir_path.joinpath(
        "enc-support.secret.values.yaml")

    with get_decrypted_file(config_filename) as decrypted_path:
        with open(decrypted_path) as f:
            prometheus_config = yaml.load(f)

    return prometheus_config.get("prometheusIngressAuthSecret", {})
Esempio n. 12
0
def get_cluster_prometheus_address(cluster_name):
    """Retrieves the address of the prometheus instance running on the `cluster_name` cluster.
    This address is stored in the `support.values.yaml` file of each cluster config directory.

    Args:
        cluster_name: name of the cluster
    Returns:
        string object: https address of the prometheus instance
    Raises ValueError if
        - `prometheusIngressAuthSecret` isn't configured
        - `support["prometheus"]["server"]["ingress"]["tls"]` doesn't exist
    """
    cluster_config_dir_path = find_absolute_path_to_cluster_file(
        cluster_name).parent

    config_file = cluster_config_dir_path.joinpath("support.values.yaml")
    with open(config_file) as f:
        support_config = yaml.load(f)

    # Don't return the address if the prometheus instance wasn't securely exposed to the outside.
    if not support_config.get("prometheusIngressAuthSecret", {}).get(
            "enabled", False):
        raise ValueError(
            f"`prometheusIngressAuthSecret` wasn't configured for {cluster_name}"
        )

    tls_config = (support_config.get("prometheus",
                                     {}).get("server",
                                             {}).get("ingress",
                                                     {}).get("tls", []))

    if not tls_config:
        raise ValueError(
            f"No tls config was found for the prometheus instance of {cluster_name}"
        )

    # We only have one tls host right now. Modify this when things change.
    return tls_config[0]["hosts"][0]
Esempio n. 13
0
def deploy_grafana_dashboards(cluster_name):
    """
    Deploy grafana dashboards to a cluster that provide useful metrics
    for operating a JupyterHub

    Grafana dashboards and deployment mechanism in question are maintained in
    this repo: https://github.com/jupyterhub/grafana-dashboards
    """
    validate_cluster_config(cluster_name)
    validate_support_config(cluster_name)

    config_file_path = find_absolute_path_to_cluster_file(cluster_name)
    with open(config_file_path) as f:
        cluster = Cluster(yaml.load(f), config_file_path.parent)

    # If grafana support chart is not deployed, then there's nothing to do
    if not cluster.support:
        print_colour(
            "Support chart has not been deployed. Skipping Grafana dashboards deployment..."
        )
        return

    grafana_token_file = (
        config_file_path.parent).joinpath("enc-grafana-token.secret.yaml")

    # Read the cluster specific secret grafana token file
    with get_decrypted_file(grafana_token_file) as decrypted_file_path:
        with open(decrypted_file_path) as f:
            config = yaml.load(f)

    # Check GRAFANA_TOKEN exists in the secret config file before continuing
    if "grafana_token" not in config.keys():
        raise ValueError(
            f"`grafana_token` not provided in secret file! Please add it and try again: {grafana_token_file}"
        )

    # FIXME: We assume grafana_url and uses_tls config will be defined in the first
    #        file listed under support.helm_chart_values_files.
    support_values_file = cluster.support.get("helm_chart_values_files", [])[0]
    with open(config_file_path.parent.joinpath(support_values_file)) as f:
        support_values_config = yaml.load(f)

    # Get the url where grafana is running from the support values file
    grafana_url = (support_values_config.get("grafana",
                                             {}).get("ingress",
                                                     {}).get("hosts", {}))
    uses_tls = (support_values_config.get("grafana",
                                          {}).get("ingress",
                                                  {}).get("tls", {}))

    if not grafana_url:
        print_colour(
            "Couldn't find `config.grafana.ingress.hosts`. Skipping Grafana dashboards deployment..."
        )
        return

    grafana_url = (f"https://{grafana_url[0]}"
                   if uses_tls else f"http://{grafana_url[0]}")

    # Use the jupyterhub/grafana-dashboards deployer to deploy the dashboards to this cluster's grafana
    print_colour("Cloning jupyterhub/grafana-dashboards...")

    dashboards_dir = "grafana_dashboards"

    subprocess.check_call([
        "git",
        "clone",
        "https://github.com/jupyterhub/grafana-dashboards",
        dashboards_dir,
    ])

    # We need the existing env too for the deployer to be able to find jssonnet and grafonnet
    deploy_env = os.environ.copy()
    deploy_env.update({"GRAFANA_TOKEN": config["grafana_token"]})

    try:
        print_colour(f"Deploying grafana dashboards to {cluster_name}...")
        subprocess.check_call(["./deploy.py", grafana_url],
                              env=deploy_env,
                              cwd=dashboards_dir)

        print_colour(f"Done! Dashboards deployed to {grafana_url}.")
    finally:
        # Delete the directory where we cloned the repo.
        # The deployer cannot call jsonnet to deploy the dashboards if using a temp directory here.
        # Might be because opening more than once of a temp file is tried
        # (https://docs.python.org/3.8/library/tempfile.html#tempfile.NamedTemporaryFile)
        shutil.rmtree(dashboards_dir)
Esempio n. 14
0
def run_hub_health_check(cluster_name, hub_name, check_dask_scaling=False):
    """Run a health check on a given hub on a given cluster. Optionally check scaling
    of dask workers if the hub is a daskhub.

    Args:
        cluster_name (str): The name of the cluster where the hub is deployed
        hub_name (str): The name of the hub to run a health check for
        check_dask_scaling (bool, optional): If true, run an additional check that dask
            workers can scale. Only applies to daskhubs. Defaults to False.

    Returns
        exit_code (int): The exit code of the pytest process. 0 for pass, any other
            integer number greater than 0 for failure.
    """
    # Read in the cluster.yaml file
    config_file_path = find_absolute_path_to_cluster_file(cluster_name)
    with open(config_file_path) as f:
        cluster = Cluster(yaml.load(f), config_file_path.parent)

    # Find the hub's config
    hub_indx = [
        indx for (indx, h) in enumerate(cluster.hubs)
        if h.spec["name"] == hub_name
    ]
    if len(hub_indx) == 1:
        hub = cluster.hubs[hub_indx[0]]
    elif len(hub_indx) > 1:
        print_colour("ERROR: More than one hub with this name found!")
        sys.exit(1)
    elif len(hub_indx) == 0:
        print_colour("ERROR: No hubs with this name found!")
        sys.exit(1)

    print_colour(f"Running hub health check for {hub.spec['name']}...")

    # Check if this hub has a domain override file. If yes, apply override.
    if "domain_override_file" in hub.spec.keys():
        domain_override_file = hub.spec["domain_override_file"]

        with get_decrypted_file(
                hub.cluster.config_path.joinpath(
                    domain_override_file)) as decrypted_path:
            with open(decrypted_path) as f:
                domain_override_config = yaml.load(f)

        hub.spec["domain"] = domain_override_config["domain"]

    # Retrieve hub's URL
    hub_url = f'https://{hub.spec["domain"]}'

    # Read in the service api token from a k8s Secret in the k8s cluster
    with cluster.auth():
        try:
            service_api_token_b64encoded = subprocess.check_output(
                [
                    "kubectl",
                    "get",
                    "secrets",
                    "hub",
                    f"--namespace={hub.spec['name']}",
                    r"--output=jsonpath={.data['hub\.services\.hub-health\.apiToken']}",
                ],
                text=True,
            )
        except subprocess.CalledProcessError as e:
            raise ValueError(
                f"Failed to acquire a JupyterHub API token for the hub-health service: {e.stdout}"
            )
        service_api_token = base64.b64decode(
            service_api_token_b64encoded).decode()

    # On failure, pytest prints out params to the test that failed.
    # This can contain sensitive info - so we hide stderr
    # FIXME: Don't use pytest - just call a function instead
    #
    # Show errors locally but redirect on CI
    gh_ci = os.environ.get("CI", "false")
    pytest_args = [
        "-q",
        "deployer/tests",
        f"--hub-url={hub_url}",
        f"--api-token={service_api_token}",
        f"--hub-type={hub.spec['helm_chart']}",
    ]

    if (hub.spec["helm_chart"] == "daskhub") and check_dask_scaling:
        pytest_args.append("--check-dask-scaling")

    if gh_ci == "true":
        print_colour("Testing on CI, not printing output")
        with open(os.devnull,
                  "w") as dn, redirect_stderr(dn), redirect_stdout(dn):
            exit_code = pytest.main(pytest_args)
    else:
        print_colour("Testing locally, do not redirect output")
        exit_code = pytest.main(pytest_args)
    if exit_code != 0:
        print("Health check failed!", file=sys.stderr)
        sys.exit(exit_code)
    else:
        print_colour("Health check succeeded!")

    return exit_code
Esempio n. 15
0
    def _build_config_filename(self, cluster_name, hub_name):
        cluster_config_dir_path = find_absolute_path_to_cluster_file(
            cluster_name
        ).parent

        return cluster_config_dir_path.joinpath(f"enc-{hub_name}.secret.values.yaml")