Example #1
0
def create_cluster() -> None:
    # Create a private cluster
    # https://cloud.google.com/kubernetes-engine/docs/how-to/private-clusters
    # https://cloud.google.com/kubernetes-engine/docs/how-to/protecting-cluster-metadata
    #
    # Restrict access to K8S master to IP addresses listed in MASTER_AUTHORIZED_NETWORKS
    # https://cloud.google.com/kubernetes-engine/docs/how-to/authorized-networks
    #
    # Enable Stackdriver Kubernetes monitoring and logging
    # https://cloud.google.com/monitoring/kubernetes-engine/
    #
    # Use shielded nodes
    # https://cloud.google.com/kubernetes-engine/docs/how-to/shielded-gke-nodes
    #
    # Disable authentication with static password and client certificate
    # https://cloud.google.com/kubernetes-engine/docs/how-to/hardening-your-cluster#restrict_authn_methods
    #
    # Disable legacy metadata API
    #
    # Set nodes to automatically repair and upgrade
    # https://cloud.google.com/kubernetes-engine/docs/how-to/node-auto-repair
    # https://cloud.google.com/kubernetes-engine/docs/how-to/node-auto-upgrades
    #
    gcloud([
        "container",
        "clusters",
        "create",
        config.gke_cluster_name,
        f"--zone={config.zone}",
        "--release-channel=stable",
        "--enable-autorepair",
        "--enable-autoupgrade",
        "--maintenance-window=7:00",
        f"--service-account={config.gke_service_account_full_name}",
        f"--network={config.network_name}",
        f"--subnetwork={config.network_name}-gke",
        "--cluster-secondary-range-name=gke-pods",
        "--services-secondary-range-name=gke-services",
        "--enable-ip-alias",
        "--enable-master-authorized-networks",
        "--enable-private-nodes",
        f"--master-authorized-networks={config.authorized_networks}",
        "--master-ipv4-cidr=172.16.0.0/28",
        "--enable-stackdriver-kubernetes",
        "--enable-shielded-nodes",
        "--shielded-secure-boot",
        "--metadata=disable-legacy-endpoints=true",
        "--no-enable-basic-auth",
        "--no-enable-legacy-authorization",
        "--no-issue-client-certificate",
        "--num-nodes=1",
        "--machine-type=n1-standard-4",
    ])

    # Configure kubectl
    gcloud([
        "container", "clusters", "get-credentials", config.gke_cluster_name,
        f"--zone={config.zone}"
    ])
Example #2
0
def create_cluster_service_account() -> None:
    # Create a least privilege service account for cluster nodes
    # https://cloud.google.com/kubernetes-engine/docs/how-to/hardening-your-cluster#use_least_privilege_service_accounts_for_your_nodes

    try:
        # Do not alter the service account if it already exists.
        # Deleting and recreating a service account with the same name can lead to unexpected behavior
        # https://cloud.google.com/iam/docs/understanding-service-accounts#deleting_and_recreating_service_accounts
        gcloud(
            [
                "iam", "service-accounts", "describe",
                config.gke_service_account_full_name
            ],
            stdout=subprocess.DEVNULL,
            stderr=subprocess.DEVNULL,
        )
        print("Service account already exists")
        return
    except subprocess.CalledProcessError:
        pass

    gcloud([
        "iam",
        "service-accounts",
        "create",
        config.gke_service_account_name,
        "--display-name=gnomAD GKE nodes",
    ])

    # GKE requires logging.logWriter, monitoring.metricWriter, and monitoring.viewer
    #
    # stackdriver.resourceMetadata.writer is required for Stackdriver monitoring
    # https://cloud.google.com/monitoring/kubernetes-engine/observing
    #
    # storage.objectViewer is required to use private images in the Container Registry
    roles = [
        "logging.logWriter",
        "monitoring.metricWriter",
        "monitoring.viewer",
        "stackdriver.resourceMetadata.writer",
        "storage.objectViewer",
    ]

    for role in roles:
        subprocess.check_call(
            [
                "gcloud",
                "projects",
                "add-iam-policy-binding",
                config.project,
                f"--member=serviceAccount:{config.gke_service_account_full_name}",
                f"--role=roles/{role}",
            ],
            stdout=subprocess.DEVNULL,
        )
Example #3
0
def create_node_pool(node_pool_name: str,
                     node_pool_args: typing.List[str]) -> None:
    gcloud([
        "container",
        "node-pools",
        "create",
        node_pool_name,
        f"--cluster={config.gke_cluster_name}",
        f"--zone={config.zone}",
        "--enable-autorepair",
        "--enable-autoupgrade",
        f"--service-account={config.gke_service_account_full_name}",
        "--shielded-secure-boot",
        "--metadata=disable-legacy-endpoints=true",
    ] + node_pool_args)
Example #4
0
def create_configmap():
    # Store the IP address used for the ingress load balancer in a configmap so that the browser
    # can use it for determining the real client IP.
    ingress_ip = gcloud(
        ["compute", "addresses", "describe", config.ip_address_name, "--global", "--format=value(address)"]
    )

    kubectl(["create", "configmap", "ingress-ip", f"--from-literal=ip={ingress_ip}"])
Example #5
0
def create_network() -> None:
    # Create a VPC network
    # https://cloud.google.com/vpc/docs/using-vpc
    gcloud([
        "compute", "networks", "create", config.network_name,
        "--subnet-mode=custom"
    ])

    # Create a subnet for the GKE cluster
    # https://cloud.google.com/kubernetes-engine/docs/how-to/private-clusters#custom_subnet
    gcloud([
        "compute",
        "networks",
        "subnets",
        "create",
        f"{config.network_name}-gke",
        f"--network={config.network_name}",
        f"--region={config.region}",
        "--range=192.168.0.0/20",
        "--secondary-range=gke-pods=10.4.0.0/14,gke-services=10.0.32.0/20",
        "--enable-flow-logs",
        "--enable-private-ip-google-access",
    ])
Example #6
0
def create_configmap():
    # Store a list of all IP addresses involved in proxying requests.
    # These are used for determining the real client IP.
    ingress_ip = gcloud([
        "compute", "addresses", "describe", config.ip_address_name, "--global",
        "--format=value(address)"
    ])

    # Private/internal networks
    # These ranges match those used for the gnomad-gke subnet.
    # 127.0.0.1
    # 192.168.0.0/20
    # 10.4.0.0/14
    # 10.0.32.0/20
    #
    # Internal IPs for GCE load balancers
    # https://cloud.google.com/load-balancing/docs/https#how-connections-work
    # 35.191.0.0/16
    # 130.211.0.0/22
    ips = f"127.0.0.1,192.168.0.0/20,10.4.0.0/14,10.0.32.0/20,35.191.0.0/16,130.211.0.0/22,{ingress_ip}"
    kubectl(["create", "configmap", "proxy-ips", f"--from-literal=ips={ips}"])
Example #7
0
def create_ip_address() -> None:
    # Reserve a static external IP address to use with a load balancer.
    gcloud(
        ["compute", "addresses", "create", config.ip_address_name, "--global"])
Example #8
0
def main(argv: typing.List[str]) -> None:
    parser = argparse.ArgumentParser(prog="deployctl")

    parser.parse_args(argv)

    if not config.project:
        print("project configuration is required", file=sys.stderr)
        sys.exit(1)

    print("This will create the following resources:")
    print(f"- VPC network '{config.network_name}'")
    print(f"- IP address '{config.ip_address_name}'")
    print(f"- Router '{config.network_name}-nat-router'")
    print(f"- NAT config '{config.network_name}-nat'")
    print(f"- Service account '{config.gke_service_account_name}'")
    print(f"- GKE cluster '{config.gke_cluster_name}'")
    print("- Service account 'gnomad-es-snapshots'")
    print("- Service account 'gnomad-data-pipeline'")

    if input("Continue? (y/n) ").lower() == "y":
        print("Creating network...")
        create_network()

        print("Reserving IP address...")
        create_ip_address()

        print("Creating service account...")
        create_cluster_service_account()

        print("Creating cluster...")
        create_cluster()

        print("Creating configmap...")
        create_configmap()

        print("Creating node pools...")
        create_node_pool("redis",
                         ["--num-nodes=1", "--machine-type=e2-custom-6-49152"])

        create_node_pool("es-data", ["--machine-type=e2-highmem-8"])

        print("Creating K8S resources...")
        manifests_directory = os.path.realpath(
            os.path.join(os.path.dirname(__file__), "../../manifests"))

        kubectl(["apply", "-k", os.path.join(manifests_directory, "redis")])

        # Install Elastic Cloud on Kubernetes operator
        # https://www.elastic.co/guide/en/cloud-on-k8s/current/k8s-overview.html
        kubectl([
            "apply", "-f",
            "https://download.elastic.co/downloads/eck/1.2.1/all-in-one.yaml"
        ])

        # Configure firewall rule for ECK admission webhook
        # https://github.com/elastic/cloud-on-k8s/issues/1437
        # https://cloud.google.com/kubernetes-engine/docs/how-to/private-clusters#add_firewall_rules
        gke_firewall_rule_target_tags = gcloud([
            "compute",
            "firewall-rules",
            "list",
            f"--filter=name~^gke-{config.gke_cluster_name}",
            "--format=value(targetTags.list())",
        ]).splitlines()[0]

        gcloud([
            "compute",
            "firewall-rules",
            "create",
            f"{config.network_name}-es-webhook",
            "--action=ALLOW",
            "--direction=INGRESS",
            f"--network={config.network_name}",
            "--rules=tcp:9443",
            "--source-ranges=172.16.0.0/28",  # Matches GKE cluster master IP range
            f"--target-tags={gke_firewall_rule_target_tags}",
        ])

        # Create a service account for Elasticsearch snapshots
        # https://www.elastic.co/guide/en/cloud-on-k8s/1.2/k8s-snapshots.html#k8s-secure-settings
        try:
            # Do not alter the service account if it already exists.
            # Deleting and recreating a service account with the same name can lead to unexpected behavior
            # https://cloud.google.com/iam/docs/understanding-service-accounts#deleting_and_recreating_service_accounts
            gcloud(
                [
                    "iam",
                    "service-accounts",
                    "describe",
                    f"gnomad-es-snapshots@{config.project}.iam.gserviceaccount.com",
                ],
                stderr=subprocess.DEVNULL,
            )
            print("Snapshot account already exists")
        except subprocess.CalledProcessError:
            gcloud([
                "iam",
                "service-accounts",
                "create",
                "gnomad-es-snapshots",
                "--display-name=gnomAD Elasticsearch snapshots",
            ])
        finally:
            # Grant the snapshot service account object admin access to the snapshot bucket.
            # https://cloud.google.com/storage/docs/access-control/using-iam-permissions#bucket-add
            subprocess.check_call(
                [
                    "gsutil",
                    "iam",
                    "ch",
                    f"serviceAccount:gnomad-es-snapshots@{config.project}.iam.gserviceaccount.com:roles/storage.admin",
                    "gs://gnomad-browser-elasticsearch-snapshots",  # TODO: The bucket to use for snapshots should be configurable
                ],
                stdout=subprocess.DEVNULL,
            )

        # Download key for snapshots service account.
        # https://cloud.google.com/iam/docs/creating-managing-service-account-keys
        keys_directory = os.path.realpath(
            os.path.join(os.path.dirname(__file__), "../../keys"))
        if not os.path.exists(keys_directory):
            os.mkdir(keys_directory)
            with open(os.path.join(keys_directory, ".gitignore"),
                      "w") as gitignore_file:
                gitignore_file.write("*")

        if not os.path.exists(
                os.path.join(keys_directory,
                             "gcs.client.default.credentials_file")):
            gcloud([
                "iam",
                "service-accounts",
                "keys",
                "create",
                os.path.join(keys_directory,
                             "gcs.client.default.credentials_file"),
                f"--iam-account=gnomad-es-snapshots@{config.project}.iam.gserviceaccount.com",
            ])

        # Create K8S secret with snapshots service account key.
        kubectl(
            [
                "create",
                "secret",
                "generic",
                "es-snapshots-gcs-credentials",
                "--from-file=gcs.client.default.credentials_file",
            ],
            cwd=keys_directory,
        )

        # Create a service account for data pipeline.
        try:
            # Do not alter the service account if it already exists.
            # Deleting and recreating a service account with the same name can lead to unexpected behavior
            # https://cloud.google.com/iam/docs/understanding-service-accounts#deleting_and_recreating_service_accounts
            gcloud(
                [
                    "iam",
                    "service-accounts",
                    "describe",
                    f"gnomad-data-pipeline@{config.project}.iam.gserviceaccount.com",
                ],
                stderr=subprocess.DEVNULL,
            )
            print("Data pipeline service account already exists")
        except subprocess.CalledProcessError:
            gcloud([
                "iam", "service-accounts", "create", "gnomad-data-pipeline",
                "--display-name=gnomAD data pipeline"
            ])

            # Grant the data pipeline service account the Dataproc worker role.
            subprocess.check_call(
                [
                    "gcloud",
                    "projects",
                    "add-iam-policy-binding",
                    config.project,
                    f"--member=serviceAccount:gnomad-data-pipeline@{config.project}.iam.gserviceaccount.com",
                    "--role=roles/dataproc.worker",
                ],
                stdout=subprocess.DEVNULL,
            )

            # serviceusage.services.use is necessary to access requester pays buckets
            subprocess.check_call(
                [
                    "gcloud",
                    "projects",
                    "add-iam-policy-binding",
                    config.project,
                    f"--member=serviceAccount:gnomad-data-pipeline@{config.project}.iam.gserviceaccount.com",
                    "--role=roles/roles/serviceusage.serviceUsageConsumer",
                ],
                stdout=subprocess.DEVNULL,
            )

        finally:
            # Grant the data pipeline service account object admin access to the data pipeline bucket.
            # https://cloud.google.com/storage/docs/access-control/using-iam-permissions#bucket-add
            subprocess.check_call(
                [
                    "gsutil",
                    "iam",
                    "ch",
                    f"serviceAccount:gnomad-data-pipeline@{config.project}.iam.gserviceaccount.com:roles/storage.admin",
                    # TODO: This should use the same configuration as data pipeline output.
                    "gs://gnomad-browser-data-pipeline",
                ],
                stdout=subprocess.DEVNULL,
            )
Example #9
0
def create_network() -> None:
    # Create a VPC network
    # https://cloud.google.com/vpc/docs/using-vpc
    gcloud([
        "compute", "networks", "create", config.network_name,
        "--subnet-mode=custom"
    ])

    # Create a subnet for the GKE cluster
    # https://cloud.google.com/kubernetes-engine/docs/how-to/private-clusters#custom_subnet
    gcloud([
        "compute",
        "networks",
        "subnets",
        "create",
        f"{config.network_name}-gke",
        f"--network={config.network_name}",
        f"--region={config.region}",
        "--range=192.168.0.0/20",
        "--secondary-range=gke-pods=10.4.0.0/14,gke-services=10.0.32.0/20",
        "--enable-flow-logs",
        "--enable-private-ip-google-access",
    ])

    # Create a subnet for Dataproc nodes
    gcloud([
        "compute",
        "networks",
        "subnets",
        "create",
        f"{config.network_name}-dataproc",
        f"--network={config.network_name}",
        f"--region={config.region}",
        "--range=192.168.255.0/24",
        "--enable-flow-logs",
        "--enable-private-ip-google-access",
    ])

    # Setup Cloud NAT
    # https://cloud.google.com/nat/docs/using-nat
    # This allows pulling external Docker images for Elastic
    gcloud([
        "compute",
        "routers",
        "create",
        f"{config.network_name}-nat-router",
        f"--network={config.network_name}",
        f"--region={config.region}",
    ])

    gcloud([
        "compute",
        "routers",
        "nats",
        "create",
        f"{config.network_name}-nat",
        f"--router={config.network_name}-nat-router",
        "--auto-allocate-nat-external-ips",
        "--nat-all-subnet-ip-ranges",
        "--enable-logging",
    ])

    # Allow Dataproc machines to talk to each other
    # https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/network
    # Dataproc clusters must be created with --tags=dataproc-node for this to apply
    gcloud([
        "compute",
        "firewall-rules",
        "create",
        f"{config.network_name}-dataproc-internal",
        "--action=ALLOW",
        "--direction=INGRESS",
        f"--network={config.network_name}",
        "--rules=tcp:0-65535,udp:0-65535,icmp",
        "--source-tags=dataproc-node",
        "--target-tags=dataproc-node",
    ])

    # Allow SSH access to Dataproc machines from authorized networks
    # Dataproc clusters must be created with --tags=dataproc-node for this to apply
    gcloud([
        "compute",
        "firewall-rules",
        "create",
        f"{config.network_name}-dataproc-ssh",
        "--action=ALLOW",
        "--direction=INGRESS",
        f"--network={config.network_name}",
        "--rules=tcp:22",
        f"--source-ranges={config.authorized_networks}",
        "--target-tags=dataproc-node",
    ])