Ejemplo n.º 1
0
def deploy_configuration(config):
    logger.info(f'All qhub endpoints will be under *.{config["domain"]}')

    jupyterhub_endpoint = f'jupyter.{config["domain"]}'
    if ("client_id" not in config["authentication"]["config"]
            or "client_secret" not in config["authentication"]["config"]):
        logger.info(
            "client_id and client_secret were not specified - dynamically creating oauth client"
        )
        with timer(logger, "creating oauth client"):
            config["authentication"]["config"] = auth0.create_client(
                jupyterhub_endpoint)

    with timer(logger, "rendering template"):
        tmp_config = pathlib.Path("./config.yaml")
        with tmp_config.open("w") as f:
            yaml.dump(config, f)

        render_default_template(".", tmp_config)

    infrastructure_dir = pathlib.Path(
        config["project_name"]) / "infrastructure"

    terraform.init(str(infrastructure_dir))

    # ========= boostrap infrastructure ========
    terraform.apply(
        str(infrastructure_dir),
        targets=[
            "module.kubernetes",
            "module.kubernetes-initialization",
            "module.kubernetes-ingress",
        ],
    )

    # ============= update dns ================
    output = terraform.output(str(infrastructure_dir))
    for key in output:
        if key.startswith("ingress"):
            endpoint = f'{key.split("_")[1]}.{config["domain"]}'
            address = output[key]["value"]
            if re.fullmatch(r"\d+\.\d+\.\d+\.\d+", address):
                cloudflare.update_record("qhub.dev", endpoint, "A", address)
            else:
                cloudflare.update_record("qhub.dev", endpoint, "CNAME",
                                         address)

    # ======= apply entire infrastructure ========
    terraform.apply(str(infrastructure_dir))
Ejemplo n.º 2
0
def output(directory=None):
    terraform_path = download_terraform_binary()

    logger.info(f"terraform={terraform_path} output directory={directory}")
    with timer(logger, "terraform output"):
        return subprocess.check_output([terraform_path, "output", "-json"],
                                       cwd=directory).decode("utf8")[:-1]
Ejemplo n.º 3
0
def apply(directory=None, targets=None):
    targets = targets or []

    logger.info(f"terraform apply directory={directory} targets={targets}")
    command = ["apply", "-auto-approve"] + ["-target=" + _ for _ in targets]
    with timer(logger, "terraform apply"):
        run_terraform_subprocess(command, cwd=directory, prefix="terraform")
Ejemplo n.º 4
0
def deploy_configuration(config, dns_provider, dns_auto_provision,
                         disable_prompt):
    logger.info(f'All qhub endpoints will be under *.{config["domain"]}')

    with timer(logger, "deploying QHub"):
        guided_install(config, dns_provider, dns_auto_provision,
                       disable_prompt)
Ejemplo n.º 5
0
def output(directory=None):
    logger.info(f"terraform output directory={directory}")
    with timer(logger, "terraform output"):
        output = subprocess.check_output("terraform output -json",
                                         shell=True,
                                         cwd=directory).decode("utf8")
        return json.loads(output)
Ejemplo n.º 6
0
def refresh(directory=None):
    logger.info(f"terraform refresh directory={directory}")
    command = [
        "refresh",
    ]

    with timer(logger, "terraform refresh"):
        run_terraform_subprocess(command, cwd=directory, prefix="terraform")
Ejemplo n.º 7
0
def tfimport(addr, id, directory=None):
    logger.info(f"terraform import directory={directory} addr={addr} id={id}")
    command = ["import", addr, id]
    with timer(logger, "terraform import"):
        run_terraform_subprocess(command,
                                 cwd=directory,
                                 prefix="terraform",
                                 strip_errors=True,
                                 timeout=30)
Ejemplo n.º 8
0
def apply(directory=None, targets=None):
    targets = targets or []

    logger.info(f"terraform apply directory={directory} targets={targets}")
    with timer(logger, "terraform apply"):
        command = " ".join(["terraform", "apply", "-auto-approve"] +
                           ["-target=" + _ for _ in targets])

        subprocess.check_output(command, shell=True, cwd=directory)
Ejemplo n.º 9
0
def deploy_configuration(
    config,
    dns_provider,
    dns_auto_provision,
    disable_prompt,
    skip_remote_state_provision,
    full_only,
):
    logger.info(f'All qhub endpoints will be under https://{config["domain"]}')

    with timer(logger, "deploying QHub"):
        try:
            guided_install(
                config,
                dns_provider,
                dns_auto_provision,
                disable_prompt,
                skip_remote_state_provision,
                full_only,
            )
        except CalledProcessError as e:
            logger.error(e.output)
            raise e
Ejemplo n.º 10
0
def destroy_configuration(config):
    logger.info(
        """Removing all infrastructure, your local files will still remain, \n
    you can use 'qhub deploy' to re - install infrastructure using same config file"""
    )

    with timer(logger, "destroying QHub"):
        # 01 Verify configuration file exists
        verify_configuration_file_exists()

        # 02 Check terraform
        check_terraform()

        # 03 Check Environment Variables
        check_cloud_credentials(config)

        # 04 Remove all infrastructure
        with change_directory("infrastructure"):
            run(["terraform", "destroy", "-auto-approve"])

        # 06 Remove terraform backend remote state bucket
        with change_directory("terraform-state"):
            run(["terraform", "destroy", "-auto-approve"])
Ejemplo n.º 11
0
def destroy_configuration(config,
                          skip_remote_state_provision=False,
                          full_only=False):
    logger.info(
        """Removing all infrastructure, your local files will still remain,
    you can use 'qhub deploy' to re-install infrastructure using same config file\n"""
    )

    with timer(logger, "destroying QHub"):
        # 01 Check Environment Variables
        check_cloud_credentials(config)

        # 02 Remove all infrastructure
        terraform.init(directory="infrastructure")
        terraform.refresh(directory="infrastructure")

        if not full_only:
            stages = (
                {
                    "name":
                    "General cluster software",
                    "targets": [
                        "module.kubernetes-nfs-mount",
                        "module.kubernetes-nfs-server",
                        "module.kubernetes-nfs-mount",
                        "module.kubernetes-conda-store-server",
                        "module.kubernetes-conda-store-mount",
                        "module.kubernetes-autoscaling",
                        "module.qhub",
                        "module.prefect",
                        "module.monitoring",
                        "module.clearml",
                        "module.forwardauth",
                        "random_password.jupyterhub-jhsecret",
                        "random_password.forwardauth-jhsecret",
                        "kubernetes_secret.qhub_yaml_secret",
                    ] + [
                        f"module.{helmext['name']}-extension"
                        for helmext in config.get("helm_extensions", [])
                    ] + [
                        f"module.ext-{ext['name']}"
                        for ext in config.get("extensions", [])
                    ],
                },
                {
                    "name":
                    "Keycloak Config",
                    "targets": [
                        "module.kubernetes-keycloak-config",
                        "random_password.keycloak-qhub-bot-password",
                    ],
                },
                {
                    "name": "Keycloak Helm installation",
                    "targets": ["module.kubernetes-keycloak-helm"],
                },
                {
                    "name": "Kubernetes Ingress",
                    "targets": ["module.kubernetes-ingress"],
                },
                {
                    "name":
                    "Kubernetes Cluster",
                    "targets": [
                        "module.kubernetes",
                        "module.kubernetes-initialization",
                    ],
                },
                {
                    "name":
                    "Cloud Infrastructure",
                    "targets": [
                        "module.registry-jupyterhub",  # GCP
                        "module.efs",  # AWS
                        "module.registry-jupyterlab",  # AWS
                        "module.network",  # AWS
                        "module.accounting",  # AWS
                        "module.registry",  # Azure
                    ],
                },
            )

            for stageinfo in stages:
                logger.info(
                    f"Running Terraform Stage: {stageinfo['name']} {stageinfo['targets']}"
                )
                terraform.destroy(directory="infrastructure",
                                  targets=stageinfo["targets"])

        else:
            logger.info("Running Terraform Stage: FULL")
            terraform.destroy(directory="infrastructure")

        # 03 Remove terraform backend remote state bucket
        # backwards compatible with `qhub-config.yaml` which
        # don't have `terraform_state` key
        if ((not skip_remote_state_provision) and
            (config.get("terraform_state", {}).get("type", "") == "remote")
                and (config.get("provider") != "local")):
            terraform_state_sync(config)
            terraform.destroy(directory="terraform-state")
Ejemplo n.º 12
0
def force_destroy_configuration(config):
    logging.info(
        """FORCE Removing all infrastructure (not using terraform).""")

    with timer(logging, "destroying QHub"):
        # 01 Check we have cloud details we need
        check_cloud_credentials(config)

        if config.get("provider", "") != "aws":
            raise ValueError("force-destroy currently only available for AWS")

        project_name = config.get("project_name", "").strip()

        if project_name == "":
            raise ValueError("project_name cannot be blank")

        if "amazon_web_services" not in config:
            raise ValueError(
                "amazon_web_services section must exist in qhub-config.yaml")

        region = config["amazon_web_services"].get("region", "").strip()

        if region == "":
            raise ValueError(
                "amazon_web_services.region must exist in qhub-config.yaml")

        logging.info(f"Remove AWS project {project_name} in region {region}")

        env = config.get("namespace", "dev").strip()

        # 02 Remove all infrastructure
        try:
            import boto3
        except ImportError:
            raise ValueError(
                "Please ensure boto3 package is installed using: pip install boto3==1.17.98"
            )

        restag = boto3.client("resourcegroupstaggingapi", region_name=region)

        filter_params = dict(
            TagFilters=[
                {
                    "Key": "Owner",
                    "Values": [
                        "terraform",
                        "terraform-state",
                    ],
                },
                {
                    "Key": "Environment",
                    "Values": [
                        env,
                    ],
                },
                {
                    "Key": "Project",
                    "Values": [
                        project_name,
                    ],
                },
            ],
            ResourcesPerPage=50,
        )

        resources = []

        response = restag.get_resources(**filter_params)

        resources.extend(response["ResourceTagMappingList"])

        while "PaginationToken" in response and response["PaginationToken"]:
            token = response["PaginationToken"]
            response = restag.get_resources(**filter_params,
                                            PaginationToken=token)
            resources.extend(response["ResourceTagMappingList"])

        # Load Balancer and other K8s-generated resources will need to be queried separately:

        filter_params = dict(
            TagFilters=[{
                "Key": f"kubernetes.io/cluster/{project_name}-{env}",
                "Values": [
                    "owned",
                ],
            }],
            ResourcesPerPage=50,
        )

        response = restag.get_resources(**filter_params)
        resources.extend(response["ResourceTagMappingList"])

        # IAM

        iam = boto3.resource("iam")
        for suffix in ("eks-cluster-role", "eks-node-group-role"):

            try:
                role = iam.Role(f"{project_name}-{env}-{suffix}")

                if role.tags is not None:

                    tags_dict = dict([(t["Key"], t.get("Value", ""))
                                      for t in role.tags])

                    if (tags_dict.get("Owner", "") == "terraform"
                            and tags_dict.get("Environment", "") == env
                            and tags_dict.get("Project", "") == project_name):
                        resources.append({"ResourceARN": role.arn})

            except iam.meta.client.exceptions.NoSuchEntityException:
                pass

        # Summarize resources

        type_groups = {}
        for r in resources:
            de_arned = parse_arn(r["ResourceARN"])
            t = f"{de_arned['service']}-{de_arned['resource_type']}"
            type_groups.setdefault(t, []).append(de_arned)
            logging.info(r["ResourceARN"])

        logging.info([(k, len(v)) for k, v in type_groups.items()])

        # Order
        priority_types = (
            "eks-nodegroup",
            "eks-cluster",
            "elasticloadbalancing-loadbalancer",
            "ec2-internet-gateway",
            "ec2-route-table",
            "elasticfilesystem-file-system",
            "ec2-subnet",
            "ec2-security-group",
            "ec2-vpc",
            "ecr-repository",
            "dynamodb-table",
            "s3-None",
            "resource-groups-group",
            "iam-role",
        )

        for pt in priority_types:
            logging.info(f"Inspect {pt}")
            for r in type_groups.get(pt, []):
                if pt == "eks-nodegroup":
                    nodegroup_resource = r["resource"].split("/")

                    cluster_name = nodegroup_resource[0]
                    nodegroup_name = nodegroup_resource[1]

                    logging.info(
                        f"Delete {nodegroup_name} on cluster {cluster_name}")

                    client = boto3.client("eks", region_name=region)
                    client.delete_nodegroup(clusterName=cluster_name,
                                            nodegroupName=nodegroup_name)

                elif pt == "eks-cluster":
                    logging.info(f"Delete EKS cluster {r['resource']}")

                    client = boto3.client("eks", region_name=region)

                    response = client.list_nodegroups(
                        clusterName=r["resource"])
                    while len(response["nodegroups"]) > 0:
                        logging.info("Nodegroups still present, sleep 10")
                        time.sleep(10)
                        response = client.list_nodegroups(
                            clusterName=r["resource"])

                    client.delete_cluster(name=r["resource"])

                elif pt == "elasticloadbalancing-loadbalancer":
                    client = boto3.client("elb", region_name=region)

                    logging.info(f"Inspect Load balancer {r['resource']}")

                    logging.info(f"Delete Load balancer {r['resource']}")
                    response = client.delete_load_balancer(
                        LoadBalancerName=r["resource"])

                elif pt == "ec2-route-table":
                    logging.info(f"Inspect route table {r['resource']}")
                    ec2 = boto3.resource("ec2", region_name=region)
                    route_table = ec2.RouteTable(r["resource"])

                    for assoc in route_table.associations:
                        logging.info(f"Delete route table assoc {assoc.id}")
                        assoc.delete()

                    time.sleep(10)

                    logging.info(f"Delete route table {r['resource']}")
                    route_table.delete()

                elif pt == "ec2-subnet":
                    logging.info(f"Inspect subnet {r['resource']}")
                    ec2 = boto3.resource("ec2", region_name=region)
                    subnet = ec2.Subnet(r["resource"])

                    for ni in subnet.network_interfaces.all():
                        ni.load()
                        # But can only detach if attached...
                        if ni.attachment:
                            ni.detach(DryRun=False, Force=True)
                            ni.delete()

                    logging.info(f"Delete subnet {r['resource']}")
                    subnet.delete(DryRun=False)

                elif pt == "ec2-security-group":
                    logging.info(f"Inspect security group {r['resource']}")
                    ec2 = boto3.resource("ec2", region_name=region)
                    security_group = ec2.SecurityGroup(r["resource"])

                    for ipperms in security_group.ip_permissions_egress:
                        security_group.revoke_egress(DryRun=False,
                                                     IpPermissions=[ipperms])

                    for ipperms in security_group.ip_permissions:
                        security_group.revoke_ingress(DryRun=False,
                                                      IpPermissions=[ipperms])

                    logging.info(f"Delete security group {r['resource']}")
                    security_group.delete(DryRun=False)

                elif pt == "ec2-internet-gateway":
                    logging.info(f"Inspect internet gateway {r['resource']}")

                    ec2 = boto3.resource("ec2", region_name=region)
                    internet_gateway = ec2.InternetGateway(r["resource"])

                    for attach in internet_gateway.attachments:
                        logging.info(
                            f"Inspect IG attachment {attach['VpcId']}")
                        if attach.get("State", "") == "available":
                            logging.info(f"Detach from VPC {attach['VpcId']}")
                            internet_gateway.detach_from_vpc(
                                VpcId=attach["VpcId"])

                    time.sleep(10)

                    logging.info(f"Delete internet gateway {r['resource']}")
                    internet_gateway.delete(DryRun=False)

                elif pt == "elasticfilesystem-file-system":
                    client = boto3.client("efs", region_name=region)

                    logging.info(f"Delete efs {r['resource']}")

                    mts = client.describe_mount_targets(
                        FileSystemId=r["resource"])

                    for mt in mts["MountTargets"]:
                        client.delete_mount_target(
                            MountTargetId=mt["MountTargetId"])

                    response = client.delete_file_system(
                        FileSystemId=r["resource"])

                    ## Should wait until this returns botocore.errorfactory.FileSystemNotFound:
                    # response = client.describe_file_systems(
                    #    FileSystemId=r['resource']
                    # )

                elif pt == "ec2-vpc":
                    logging.info(f"Inspect VPC {r['resource']}")

                    ec2 = boto3.resource("ec2", region_name=region)

                    vpc = ec2.Vpc(r["resource"])

                    # for cidr_assoc in vpc.cidr_block_association_set:
                    #    logging.info(cidr_assoc)
                    #    r = vpc.disassociate_subnet_cidr_block(
                    #        AssociationId=cidr_assoc['AssociationId']
                    #    )
                    #    logging.info(r)

                    logging.info(f"Delete VPC {r['resource']}")
                    vpc.delete()

                elif pt == "ecr-repository":
                    logging.info(f"Inspect ECR {r['resource']}")
                    client = boto3.client("ecr", region_name=region)

                    logging.info(
                        f"Delete ecr {r['account']} / {r['resource']}")

                    response = response = client.delete_repository(
                        registryId=r["account"],
                        repositoryName=r["resource"],
                        force=True,
                    )

                elif pt == "s3-None":
                    logging.info(f"Inspect S3 {r['resource']}")
                    s3 = boto3.resource("s3", region_name=region)

                    logging.info(f"Delete s3 {r['resource']}")

                    bucket = s3.Bucket(r["resource"])

                    r = bucket.objects.all().delete()

                    r = bucket.object_versions.delete()

                    response = bucket.delete()

                elif pt == "dynamodb-table":
                    logging.info(f"Inspect DynamoDB {r['resource']}")

                    client = boto3.client("dynamodb", region_name=region)

                    logging.info(f"Delete DynamoDB {r['resource']}")

                    response = client.delete_table(TableName=r["resource"])

                elif pt == "resource-groups-group":
                    logging.info(f"Inspect Resource Group {r['resource']}")

                    client = boto3.client("resource-groups",
                                          region_name=region)

                    logging.info(f"Delete Resource Group {r['resource']}")

                    response = client.delete_group(Group=r["arn"])

                elif pt == "iam-role":
                    logging.info(f"Inspect IAM Role {r['resource']}")
                    iam = boto3.resource("iam")
                    role = iam.Role(r["resource"])

                    for policy in role.attached_policies.all():
                        logging.info(f"Detach Role policy {policy.arn}")
                        response = role.detach_policy(PolicyArn=policy.arn)

                    logging.info(f"Delete IAM Role {r['resource']}")
                    role.delete()
Ejemplo n.º 13
0
def init(directory=None):
    logger.info(f"terraform init directory={directory}")
    with timer(logger, "terraform init"):
        run_terraform_subprocess(["init"], cwd=directory, prefix="terraform")
Ejemplo n.º 14
0
def init(directory=None):
    logger.info(f"terraform init directory={directory}")
    with timer(logger, "terraform init"):
        subprocess.check_output("terraform init", shell=True, cwd=directory)
Ejemplo n.º 15
0
def destroy(directory=None):
    logger.info(f"terraform destroy directory={directory}")

    with timer(logger, "terraform destroy"):
        command = "terraform destroy -auto-approve"
        subprocess.check_output(command, shell=True, cwd=directory)