Ejemplo n.º 1
0
def deploy_init_cluster(settings):
    """Provisions a GKE cluster, persistant disks, and any other prerequisites for deployment."""

    print_separator("init-cluster")

    # initialize the VM
    node_name = get_node_name()
    if not node_name:
        raise Exception(
            "Unable to retrieve node name. Was the cluster created successfully?"
        )

    if settings["DEPLOY_TO_PREFIX"] == "gcloud":
        run("gcloud config set project %(GCLOUD_PROJECT)s" % settings)

        # create private network so that dataproc jobs can connect to GKE cluster nodes
        # based on: https://medium.com/@DazWilkin/gkes-cluster-ipv4-cidr-flag-69d25884a558
        create_vpc(gcloud_project="%(GCLOUD_PROJECT)s" % settings,
                   network_name="%(GCLOUD_PROJECT)s-auto-vpc" % settings)

        # create cluster
        run(
            " ".join([
                "gcloud container clusters create %(CLUSTER_NAME)s",
                "--project %(GCLOUD_PROJECT)s",
                "--zone %(GCLOUD_ZONE)s",
                "--machine-type %(CLUSTER_MACHINE_TYPE)s",
                "--num-nodes 1",
                #"--network %(GCLOUD_PROJECT)s-auto-vpc",
                #"--local-ssd-count 1",
                "--scopes",
                "https://www.googleapis.com/auth/devstorage.read_write"
            ]) % settings,
            verbose=False,
            errors_to_ignore=["already exists"])

        # create cluster nodes - breaking them up into node pools of several machines each.
        # This way, the cluster can be scaled up and down when needed using the technique in
        #    https://github.com/mattsolo1/gnomadjs/blob/master/cluster/elasticsearch/Makefile#L23
        #
        i = 0
        num_nodes_remaining_to_create = int(settings["CLUSTER_NUM_NODES"]) - 1
        num_nodes_per_node_pool = int(settings["NUM_NODES_PER_NODE_POOL"])
        while num_nodes_remaining_to_create > 0:
            i += 1
            run(
                " ".join([
                    "gcloud beta container node-pools create %(CLUSTER_NAME)s-"
                    + str(i),
                    "--cluster %(CLUSTER_NAME)s",
                    "--project %(GCLOUD_PROJECT)s",
                    "--zone %(GCLOUD_ZONE)s",
                    "--machine-type %(CLUSTER_MACHINE_TYPE)s",
                    "--num-nodes %s" % min(num_nodes_per_node_pool,
                                           num_nodes_remaining_to_create),
                    #"--network %(GCLOUD_PROJECT)s-auto-vpc",
                    #"--local-ssd-count 1",
                    "--scopes",
                    "https://www.googleapis.com/auth/devstorage.read_write"
                ]) % settings,
                verbose=False,
                errors_to_ignore=["already exists"])

            num_nodes_remaining_to_create -= num_nodes_per_node_pool

        run(" ".join([
            "gcloud container clusters get-credentials %(CLUSTER_NAME)s",
            "--project %(GCLOUD_PROJECT)s",
            "--zone %(GCLOUD_ZONE)s",
        ]) % settings)

        # create elasticsearch disks
        run(" ".join([
            "kubectl apply -f %(DEPLOYMENT_TEMP_DIR)s/deploy/kubernetes/elasticsearch/ssd-storage-class.yaml"
            % settings,
        ]))

        #run(" ".join([
        #    "gcloud compute disks create %(CLUSTER_NAME)s-elasticsearch-disk-0  --type=pd-ssd --zone=us-central1-b --size=%(ELASTICSEARCH_DISK_SIZE)sGi" % settings,
        #]), errors_to_ignore=["already exists"])

        #run(" ".join([
        #    "kubectl apply -f %(DEPLOYMENT_TEMP_DIR)s/deploy/kubernetes/elasticsearch/es-persistent-volume.yaml" % settings,
        #]))

        # if cluster was already created previously, update it's size to match CLUSTER_NUM_NODES
        #run(" ".join([
        #    "gcloud container clusters resize %(CLUSTER_NAME)s --size %(CLUSTER_NUM_NODES)s" % settings,
        #]), is_interactive=True)

        # create persistent disks
        for label in (
                "postgres",
        ):  # "mongo"): # , "elasticsearch-sharded"):  # "elasticsearch"
            run(" ".join([
                "gcloud compute disks create",
                "--zone %(GCLOUD_ZONE)s",
                "--size %(" + label.upper().replace("-", "_") + "_DISK_SIZE)s",
                "%(CLUSTER_NAME)s-" + label + "-disk",
            ]) % settings,
                verbose=True,
                errors_to_ignore=["already exists"])
    elif settings["DEPLOY_TO"] == "kube-solo":
        run("mkdir -p %(POSTGRES_DBPATH)s" % settings)
        run("mkdir -p %(MONGO_DBPATH)s" % settings)
        run("mkdir -p %(ELASTICSEARCH_DBPATH)s" % settings)
    elif settings["DEPLOY_TO"] == "minikube":
        # fix time sync issues on MacOSX which could interfere with token auth (https://github.com/kubernetes/minikube/issues/1378)
        run("minikube ssh -- docker run -i --rm --privileged --pid=host debian nsenter -t 1 -m -u -n -i date -u $(date -u +%m%d%H%M%Y)"
            )
    else:
        raise ValueError("Unexpected DEPLOY_TO_PREFIX: %(DEPLOY_TO_PREFIX)s" %
                         settings)

    # set VM settings required for elasticsearch
    if settings["DEPLOY_TO"] == "minikube":
        run("minikube ssh 'sudo /sbin/sysctl -w vm.max_map_count=262144'" %
            locals())
    elif settings["DEPLOY_TO"] == "kube-solo":
        run("corectl ssh %(node_name)s \"sudo /sbin/sysctl -w vm.max_map_count=262144\""
            % locals())

    #else:
    #    run(" ".join([
    #        "gcloud compute ssh "+node_name,
    #        "--zone %(GCLOUD_ZONE)s",
    #        "--command \"sudo /sbin/sysctl -w vm.max_map_count=262144\""
    #    ]) % settings)

    # print cluster info
    run("kubectl cluster-info", verbose=True)
Ejemplo n.º 2
0
def deploy_init_cluster(settings):
    """Provisions a GKE cluster, persistant disks, and any other prerequisites for deployment."""

    print_separator("init-cluster")

    if settings["DEPLOY_TO_PREFIX"] == "gcloud":
        run("gcloud config set project %(GCLOUD_PROJECT)s" % settings)

        # create private network so that dataproc jobs can connect to GKE cluster nodes
        # based on: https://medium.com/@DazWilkin/gkes-cluster-ipv4-cidr-flag-69d25884a558
        create_vpc(gcloud_project="%(GCLOUD_PROJECT)s" % settings, network_name="%(GCLOUD_PROJECT)s-auto-vpc" % settings)

        # create cluster
        run(" ".join([
            "gcloud container clusters create %(CLUSTER_NAME)s",
            "--project %(GCLOUD_PROJECT)s",
            "--zone %(GCLOUD_ZONE)s",
            "--machine-type %(CLUSTER_MACHINE_TYPE)s",
            "--num-nodes %(CLUSTER_NUM_NODES)s",
            #"--network %(GCLOUD_PROJECT)s-auto-vpc",
            #"--local-ssd-count 1",
            "--scopes", "https://www.googleapis.com/auth/devstorage.read_write"
        ]) % settings, verbose=False, errors_to_ignore=["already exists"])

        run(" ".join([
            "gcloud container clusters get-credentials %(CLUSTER_NAME)s",
            "--project %(GCLOUD_PROJECT)s",
            "--zone %(GCLOUD_ZONE)s",
        ]) % settings)

        # create disks
        run(" ".join([
            "kubectl apply -f %(DEPLOYMENT_TEMP_DIR)s/deploy/kubernetes/elasticsearch/ssd-storage-class.yaml" % settings,
        ]))

        #run(" ".join([
        #    "gcloud compute disks create %(CLUSTER_NAME)s-elasticsearch-disk-0  --type=pd-ssd --zone=us-central1-b --size=%(ELASTICSEARCH_DISK_SIZE)sGi" % settings,
        #]), errors_to_ignore=["already exists"])

        #run(" ".join([
        #    "kubectl apply -f %(DEPLOYMENT_TEMP_DIR)s/deploy/kubernetes/elasticsearch/es-persistent-volume.yaml" % settings,
        #]))


        # if cluster was already created previously, update it's size to match CLUSTER_NUM_NODES
        #run(" ".join([
        #    "gcloud container clusters resize %(CLUSTER_NAME)s --size %(CLUSTER_NUM_NODES)s" % settings,
        #]), is_interactive=True)

        # create persistent disks
        for label in ("postgres",): # "mongo"): # , "elasticsearch-sharded"):  # "elasticsearch"
            run(" ".join([
                    "gcloud compute disks create",
                    "--zone %(GCLOUD_ZONE)s",
                    "--size %("+label.upper().replace("-", "_")+"_DISK_SIZE)s",
                    "%(CLUSTER_NAME)s-"+label+"-disk",
                ]) % settings, verbose=True, errors_to_ignore=["already exists"])
    else:
        run("mkdir -p %(POSTGRES_DBPATH)s" % settings)
        run("mkdir -p %(MONGO_DBPATH)s" % settings)
        run("mkdir -p %(ELASTICSEARCH_DBPATH)s" % settings)

    # initialize the VM
    node_name = get_node_name()
    if not node_name:
        raise Exception("Unable to retrieve node name. Was the cluster created successfully?")

    # set VM settings required for elasticsearch
    if settings["DEPLOY_TO"] == "local":
        run("corectl ssh %(node_name)s \"sudo /sbin/sysctl -w vm.max_map_count=262144\"" % locals())

    #else:
    #    run(" ".join([
    #        "gcloud compute ssh "+node_name,
    #        "--zone %(GCLOUD_ZONE)s",
    #        "--command \"sudo /sbin/sysctl -w vm.max_map_count=262144\""
    #    ]) % settings)

    # deploy ConfigMap file so that settings key/values can be added as environment variables in each of the pods
    #with open(os.path.join(output_dir, "deploy/kubernetes/all-settings.properties"), "w") as f:
    #    for key, value in settings.items():
    #        f.write("%s=%s\n" % (key, value))

    #run("kubectl delete configmap all-settings")
    #run("kubectl create configmap all-settings --from-file=deploy/kubernetes/all-settings.properties")
    #run("kubectl get configmaps all-settings -o yaml")

    run("kubectl cluster-info", verbose=True)