Exemple #1
0
def deploy_init_cluster(settings):
    """Provisions a GKE cluster, persistent disks, and any other prerequisites for deployment."""

    print_separator("init-cluster")

    # initialize the VM
    if settings["DEPLOY_TO"] == "minikube":
        _init_cluster_minikube(settings)
    elif settings["DEPLOY_TO_PREFIX"] == "gcloud":
        _init_cluster_gcloud(settings)
    else:
        raise ValueError("Unexpected DEPLOY_TO_PREFIX: %(DEPLOY_TO_PREFIX)s" % settings)

    node_name = get_node_name()
    if not node_name:
        raise Exception("Unable to retrieve node name. Was the cluster created successfully?")

    set_environment(settings["DEPLOY_TO"])

    create_namespace(settings)

    # print cluster info
    run("kubectl cluster-info", verbose=True)

    # wait for the cluster to initialize
    for retry_i in range(1, 5):
        try:
            deploy_settings(settings)
            break
        except RuntimeError as e:
            logger.error(("Error when deploying config maps: %(e)s. This sometimes happens when cluster is "
                          "initializing. Retrying...") % locals())
            time.sleep(5)
Exemple #2
0
def deploy_secrets(settings):
    """Deploys or updates k8s secrets."""

    print_separator("secrets")

    # deploy secrets
    for secret in ["seqr-secrets", "postgres-secrets", "nginx-secrets", "matchbox-secrets"]:
        run("kubectl delete secret " + secret, verbose=False, errors_to_ignore=["not found"])

    run(" ".join([
        "kubectl create secret generic seqr-secrets",
        "--from-file deploy/secrets/%(DEPLOY_TO)s/seqr/django_key",
        "--from-file deploy/secrets/%(DEPLOY_TO)s/seqr/omim_key",
        "--from-file deploy/secrets/%(DEPLOY_TO)s/seqr/postmark_server_token",
        "--from-file deploy/secrets/%(DEPLOY_TO)s/seqr/mme_node_admin_token",
    ]) % settings)

    run(" ".join([
        "kubectl create secret generic postgres-secrets",
        "--from-file deploy/secrets/%(DEPLOY_TO)s/postgres/postgres.username",
        "--from-file deploy/secrets/%(DEPLOY_TO)s/postgres/postgres.password",
    ]) % settings)

    run(" ".join([
        "kubectl create secret generic nginx-secrets",
        "--from-file deploy/secrets/%(DEPLOY_TO)s/nginx/tls.key",
        "--from-file deploy/secrets/%(DEPLOY_TO)s/nginx/tls.crt",
    ]) % settings)

    run(" ".join([
        "kubectl create secret generic matchbox-secrets",
        "--from-file deploy/secrets/%(DEPLOY_TO)s/matchbox/nodes.json",
        "--from-file deploy/secrets/%(DEPLOY_TO)s/matchbox/application.properties",
        "--from-file deploy/secrets/%(DEPLOY_TO)s/matchbox/config.xml",
    ]) % settings)
Exemple #3
0
def run_in_pod(pod_name,
               command,
               deployment_target=None,
               errors_to_ignore=None,
               verbose=False,
               is_interactive=False):
    """Runs a kubernetes command to execute an arbitrary linux command string on the given pod.

    Args:
        pod_name (string): keyword to use for looking up a kubernetes pod (eg. 'phenotips' or 'nginx')
        command (string): the command to execute.
        is_interactive (bool): whether the command expects input from the user
    """

    if pod_name in DEPLOYABLE_COMPONENTS:
        full_pod_name = get_pod_name(pod_name,
                                     deployment_target=deployment_target)
        if not full_pod_name:
            raise ValueError(
                "No '%(pod_name)s' pods found. Is the kubectl environment configured in this terminal? and has this type of pod been deployed?"
                % locals())
    else:
        full_pod_name = pod_name

    it_arg = "-it" if is_interactive else ""
    run("kubectl exec %(it_arg)s %(full_pod_name)s -- %(command)s" % locals(),
        errors_to_ignore=errors_to_ignore,
        verbose=verbose,
        is_interactive=is_interactive)
Exemple #4
0
def docker_build(component_label, settings, custom_build_args=()):
    params = dict(settings)   # make a copy before modifying
    params["COMPONENT_LABEL"] = component_label
    params["DOCKER_IMAGE_NAME"] = "%(DOCKER_IMAGE_PREFIX)s/%(COMPONENT_LABEL)s" % params

    docker_command_prefix = "eval $(minikube docker-env); " if settings["DEPLOY_TO"] == "minikube" else ""

    docker_tags = set([
        "",
        ":latest",
        "%(DOCKER_IMAGE_TAG)s" % params,
        ])

    if not settings["BUILD_DOCKER_IMAGES"]:
        logger.info("Skipping docker build step. Use --build-docker-image to build a new image (and --force to build from the beginning)")
    else:
        docker_build_command = docker_command_prefix
        docker_build_command += "docker build deploy/docker/%(COMPONENT_LABEL)s/ "
        docker_build_command += (" ".join(custom_build_args) + " ")
        if settings["FORCE_BUILD_DOCKER_IMAGES"]:
            docker_build_command += "--no-cache "

        for tag in docker_tags:
            docker_image_name_with_tag = params["DOCKER_IMAGE_NAME"] + tag
            docker_build_command += "-t %(docker_image_name_with_tag)s " % locals()

        run(docker_build_command % params, verbose=True)

    if settings["PUSH_TO_REGISTRY"]:
        for tag in docker_tags:
            docker_image_name_with_tag = params["DOCKER_IMAGE_NAME"] + tag
            docker_push_command = docker_command_prefix
            docker_push_command += "docker push %(docker_image_name_with_tag)s" % locals()
            run(docker_push_command, verbose=True)
            logger.info("==> Finished uploading image: %(docker_image_name_with_tag)s" % locals())
def docker_build(component_label, settings, custom_build_args=()):
    params = dict(settings)   # make a copy before modifying
    params["COMPONENT_LABEL"] = component_label
    params["DOCKER_IMAGE_NAME"] = "%(DOCKER_IMAGE_PREFIX)s/%(COMPONENT_LABEL)s" % params

    docker_command_prefix = "eval $(minikube docker-env); " if settings["DEPLOY_TO"] == "minikube" else ""

    docker_tags = set([
        "",
        ":latest",
        "%(DOCKER_IMAGE_TAG)s" % params,
        ])

    if not settings["BUILD_DOCKER_IMAGES"]:
        logger.info("Skipping docker build step. Use --build-docker-image to build a new image (and --force to build from the beginning)")
    else:
        docker_build_command = docker_command_prefix
        docker_build_command += "docker build deploy/docker/%(COMPONENT_LABEL)s/ "
        docker_build_command += (" ".join(custom_build_args) + " ")
        if settings["FORCE_BUILD_DOCKER_IMAGES"]:
            docker_build_command += "--no-cache "

        for tag in docker_tags:
            docker_image_name_with_tag = params["DOCKER_IMAGE_NAME"] + tag
            docker_build_command += "-t %(docker_image_name_with_tag)s " % locals()

        run(docker_build_command % params, verbose=True)

    if settings["PUSH_TO_REGISTRY"]:
        for tag in docker_tags:
            docker_image_name_with_tag = params["DOCKER_IMAGE_NAME"] + tag
            docker_push_command = docker_command_prefix
            docker_push_command += "docker push %(docker_image_name_with_tag)s" % locals()
            run(docker_push_command, verbose=True)
            logger.info("==> Finished uploading image: %(docker_image_name_with_tag)s" % locals())
Exemple #6
0
def set_environment(deployment_target):
    """Configure the shell environment to point to the given deployment_target using 'gcloud config set-context' and other commands.

    Args:
        deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "minikube", "gcloud-dev", etc.
    """

    settings = collections.OrderedDict()
    load_settings([
        "deploy/kubernetes/shared-settings.yaml",
        "deploy/kubernetes/%(deployment_target)s-settings.yaml" % locals(),
    ], settings)

    if deployment_target.startswith("gcloud"):
        os.environ["KUBECONFIG"] = os.path.expanduser("~/.kube/config")
        run("gcloud config set core/project %(GCLOUD_PROJECT)s" % settings,
            print_command=True)
        run("gcloud config set compute/zone %(GCLOUD_ZONE)s" % settings,
            print_command=True)
        run("gcloud container clusters get-credentials --zone=%(GCLOUD_ZONE)s %(CLUSTER_NAME)s"
            % settings,
            print_command=True)
    elif deployment_target == "minikube":
        run("kubectl config use-context minikube", print_command=True)
    else:
        raise ValueError("Unexpected deployment_target value: %s" %
                         (deployment_target, ))

    run("kubectl config set-context $(kubectl config current-context) --namespace=%(NAMESPACE)s"
        % settings)
def copy_google_bucket_file(source_path, destination_path):
    """Copy file to or from a google bucket"""

    try:
        run("gsutil -m cp -P %(source_path)s %(destination_path)s" % locals())
    except RuntimeError as e:
        raise ValueError("Failed to copy %s %s: %s" % (source_path, destination_path, e))
Exemple #8
0
def copy_files_to_or_from_pod(component,
                              deployment_target,
                              source_path,
                              dest_path,
                              direction=1):
    """Copy file(s) to or from the given component.

    Args:
        component (string): component label (eg. "postgres")
        deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "minikube", "gcloud-dev", etc.
        source_path (string): source file path. If copying files to the component, it should be a local path. Otherwise, it should be a file path inside the component pod.
        dest_path (string): destination file path. If copying files from the component, it should be a local path. Otherwise, it should be a file path inside the component pod.
        direction (int): If > 0 the file will be copied to the pod. If < 0, then it will be copied from the pod.
    """
    full_pod_name = get_pod_name(component,
                                 deployment_target=deployment_target)
    if not full_pod_name:
        raise ValueError(
            "No '%(pod_name)s' pods found. Is the kubectl environment configured in this terminal? and has this type of pod been deployed?"
            % locals())

    if direction < 0:  # copy from pod
        source_path = "%s:%s" % (full_pod_name, source_path)
    elif direction > 0:  # copy to pod
        dest_path = "%s:%s" % (full_pod_name, dest_path)

    run("kubectl cp '%(source_path)s' '%(dest_path)s'" % locals())
def create_namespace(settings):
    run("kubectl create -f %(DEPLOYMENT_TEMP_DIR)s/deploy/kubernetes/namespace.yaml"
        % settings,
        errors_to_ignore=["already exists"])

    # switch kubectl to use the new namespace
    run("kubectl config set-context $(kubectl config current-context) --namespace=%(NAMESPACE)s"
        % settings)
Exemple #10
0
def copy_google_bucket_file(source_path, destination_path):
    """Copy file to or from a google bucket"""

    try:
        run("gsutil -m cp -P %(source_path)s %(destination_path)s" % locals())
    except RuntimeError as e:
        raise ValueError("Failed to copy %s %s: %s" %
                         (source_path, destination_path, e))
Exemple #11
0
def deploy_seqr(settings):
    print_separator("seqr")

    if settings["BUILD_DOCKER_IMAGES"]:
        seqr_git_hash = run("git log -1 --pretty=%h", errors_to_ignore=["Not a git repository"])
        seqr_git_hash = (":" + seqr_git_hash.strip()) if seqr_git_hash is not None else ""

        docker_build("seqr",
                     settings,
                     [
                         "--build-arg SEQR_SERVICE_PORT=%s" % settings["SEQR_SERVICE_PORT"],
                         "--build-arg SEQR_UI_DEV_PORT=%s" % settings["SEQR_UI_DEV_PORT"],
                         "-f deploy/docker/seqr/Dockerfile",
                         "-t %(DOCKER_IMAGE_NAME)s" + seqr_git_hash,
                         ]
                     )

    if settings["ONLY_PUSH_TO_REGISTRY"]:
        return

    restore_seqr_db_from_backup = settings.get("RESTORE_SEQR_DB_FROM_BACKUP")
    reset_db = settings.get("RESET_DB")

    deployment_target = settings["DEPLOY_TO"]
    postgres_pod_name = get_pod_name("postgres", deployment_target=deployment_target)

    if settings["DELETE_BEFORE_DEPLOY"]:
        delete_pod("seqr", settings)
    elif reset_db or restore_seqr_db_from_backup:
        seqr_pod_name = get_pod_name('seqr', deployment_target=deployment_target)
        if seqr_pod_name:
            sleep_until_pod_is_running("seqr", deployment_target=deployment_target)

            run_in_pod(seqr_pod_name, "/usr/local/bin/stop_server.sh", verbose=True)

    if reset_db:
        run_in_pod(postgres_pod_name, "psql -U postgres postgres -c 'drop database seqrdb'",
                   errors_to_ignore=["does not exist"],
                   verbose=True,
                   )

    if restore_seqr_db_from_backup:
        run_in_pod(postgres_pod_name, "psql -U postgres postgres -c 'drop database seqrdb'",
                   errors_to_ignore=["does not exist"],
                   verbose=True,
                   )
        run_in_pod(postgres_pod_name, "psql -U postgres postgres -c 'create database seqrdb'", verbose=True)
        run("kubectl cp '%(restore_seqr_db_from_backup)s' %(postgres_pod_name)s:/root/$(basename %(restore_seqr_db_from_backup)s)" % locals(), verbose=True)
        run_in_pod(postgres_pod_name, "/root/restore_database_backup.sh postgres seqrdb /root/$(basename %(restore_seqr_db_from_backup)s)" % locals(), verbose=True)
        run_in_pod(postgres_pod_name, "rm /root/$(basename %(restore_seqr_db_from_backup)s)" % locals(), verbose=True)
    else:
        run_in_pod(postgres_pod_name, "psql -U postgres postgres -c 'create database seqrdb'",
                   errors_to_ignore=["already exists"],
                   verbose=True,
                   )

    deploy_pod("seqr", settings, wait_until_pod_is_ready=True)
def deploy_seqr(settings):
    print_separator("seqr")

    if settings["BUILD_DOCKER_IMAGES"]:
        seqr_git_hash = run("git log -1 --pretty=%h", errors_to_ignore=["Not a git repository"])
        seqr_git_hash = (":" + seqr_git_hash.strip()) if seqr_git_hash is not None else ""

        docker_build("seqr",
                     settings,
                     [
                         "--build-arg SEQR_SERVICE_PORT=%s" % settings["SEQR_SERVICE_PORT"],
                         "--build-arg SEQR_UI_DEV_PORT=%s" % settings["SEQR_UI_DEV_PORT"],
                         "-f deploy/docker/seqr/Dockerfile",
                         "-t %(DOCKER_IMAGE_NAME)s" + seqr_git_hash,
                         ]
                     )

    if settings["ONLY_PUSH_TO_REGISTRY"]:
        return

    restore_seqr_db_from_backup = settings.get("RESTORE_SEQR_DB_FROM_BACKUP")
    reset_db = settings.get("RESET_DB")

    deployment_target = settings["DEPLOY_TO"]
    postgres_pod_name = get_pod_name("postgres", deployment_target=deployment_target)

    if settings["DELETE_BEFORE_DEPLOY"]:
        delete_pod("seqr", settings)
    elif reset_db or restore_seqr_db_from_backup:
        seqr_pod_name = get_pod_name('seqr', deployment_target=deployment_target)
        if seqr_pod_name:
            sleep_until_pod_is_running("seqr", deployment_target=deployment_target)

            run_in_pod(seqr_pod_name, "/usr/local/bin/stop_server.sh", verbose=True)

    if reset_db:
        run_in_pod(postgres_pod_name, "psql -U postgres postgres -c 'drop database seqrdb'",
                   errors_to_ignore=["does not exist"],
                   verbose=True,
                   )

    if restore_seqr_db_from_backup:
        run_in_pod(postgres_pod_name, "psql -U postgres postgres -c 'drop database seqrdb'",
                   errors_to_ignore=["does not exist"],
                   verbose=True,
                   )
        run_in_pod(postgres_pod_name, "psql -U postgres postgres -c 'create database seqrdb'", verbose=True)
        run("kubectl cp '%(restore_seqr_db_from_backup)s' %(postgres_pod_name)s:/root/$(basename %(restore_seqr_db_from_backup)s)" % locals(), verbose=True)
        run_in_pod(postgres_pod_name, "/root/restore_database_backup.sh postgres seqrdb /root/$(basename %(restore_seqr_db_from_backup)s)" % locals(), verbose=True)
        run_in_pod(postgres_pod_name, "rm /root/$(basename %(restore_seqr_db_from_backup)s)" % locals(), verbose=True)
    else:
        run_in_pod(postgres_pod_name, "psql -U postgres postgres -c 'create database seqrdb'",
                   errors_to_ignore=["already exists"],
                   verbose=True,
                   )

    deploy_pod("seqr", settings, wait_until_pod_is_ready=True)
Exemple #13
0
    def run_hail(self, script_path, *script_args):
        """Runs the hail script locally."""

        if not os.path.isfile(script_path):
            raise ValueError("Script file not found: %(script_path)s" %
                             locals())

        script_args_string = " ".join(script_args)
        run("python %(script_path)s -- %(script_args_string)s" % locals())
def deploy_external_connector(settings, connector_name):
    if connector_name not in ["mongo", "elasticsearch"]:
        raise ValueError("Invalid connector name: %s" % connector_name)

    if settings["ONLY_PUSH_TO_REGISTRY"]:
        return

    print_separator("external-%s-connector" % connector_name)

    run(("kubectl apply -f %(DEPLOYMENT_TEMP_DIR)s/deploy/kubernetes/external-connectors/" % settings) + "external-%(connector_name)s.yaml" % locals())
def deploy_kube_scan(settings):
    print_separator("kube-scan")

    if settings["DELETE_BEFORE_DEPLOY"]:
        run("kubectl apply -f https://raw.githubusercontent.com/octarinesec/kube-scan/master/kube-scan.yaml")

        if settings["ONLY_PUSH_TO_REGISTRY"]:
            return

    run("kubectl apply -f https://raw.githubusercontent.com/octarinesec/kube-scan/master/kube-scan.yaml")
Exemple #16
0
def deploy_external_connector(settings, connector_name):
    if connector_name not in ["mongo", "elasticsearch"]:
        raise ValueError("Invalid connector name: %s" % connector_name)

    if settings["ONLY_PUSH_TO_REGISTRY"]:
        return

    print_separator("external-%s-connector" % connector_name)

    run(("kubectl apply -f %(DEPLOYMENT_TEMP_DIR)s/deploy/kubernetes/external-connectors/" % settings) + "external-%(connector_name)s.yaml" % locals())
    def init_runner(self,
                    genome_version,
                    machine_type="n1-highmem-4",
                    num_workers=2,
                    num_preemptible_workers=5,
                    synchronous=False):
        """Create a data-proc cluster.

        Args:
            genome_version (string): "37" or "38"
            machine_type (string): google cloud machine type
            num_workers (int):
            num_preemptible_workers (int):
            synchronous (bool): Whether to wait until the cluster is created before returning.
        """

        cluster_id = self.cluster_id
        genome_version_label = "GRCh%s" % genome_version

        # gs://hail-common/vep/vep/GRCh%(genome_version)s/vep85-GRCh%(genome_version)s-init.sh
        run(" ".join([
            "gcloud dataproc clusters create %(cluster_id)s",
            "--project",
            GCLOUD_PROJECT,
            "--zone",
            GCLOUD_ZONE,
            "--master-machine-type",
            machine_type,
            "--master-boot-disk-size 100",
            "--num-workers 2",
            "--worker-machine-type",
            machine_type,
            "--worker-boot-disk-size 100",
            "--num-preemptible-workers %(num_preemptible_workers)s",
            "--image-version 1.1",
            "--properties",
            "spark:spark.driver.extraJavaOptions=-Xss4M,spark:spark.executor.extraJavaOptions=-Xss4M,spark:spark.driver.memory=45g,spark:spark.driver.maxResultSize=30g,spark:spark.task.maxFailures=20,spark:spark.yarn.executor.memoryOverhead=30,spark:spark.kryoserializer.buffer.max=1g,hdfs:dfs.replication=1",
            "--initialization-actions",
            "gs://hail-common/hail-init.sh,gs://hail-common/vep/vep/%(genome_version_label)s/vep85-%(genome_version_label)s-init.sh",
        ]) % locals()).wait()

        # wait for cluster to initialize. The reason this loop is necessary even when
        # "gcloud dataproc clusters create" is run without --async is that the dataproc clusters
        # create command exits with an error if the cluster already exists, even if it's not in a
        # RUNNING state. This loop makes sure that the cluster is Running before proceeding.
        if synchronous:
            logger.info(
                "waiting for cluster %(cluster_id)s - current status: [%(cluster_status)s]"
                % locals())
            while True:
                cluster_status = self._get_dataproc_cluster_status()
                if cluster_status == "RUNNING":
                    logger.info("cluster status: [%s]" % (cluster_status, ))
                    break
                time.sleep(5)
Exemple #18
0
def troubleshoot_component(component, deployment_target):
    """Runs kubectl command to print detailed debug output for the given component.

    Args:
        component (string): component label (eg. "postgres")
        deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "minikube", "gcloud-dev", etc.
    """

    pod_name = get_pod_name(component, deployment_target=deployment_target)

    run("kubectl get pods -o yaml %(pod_name)s" % locals(), verbose=True)
Exemple #19
0
def _deploy_pod(component_label, settings, wait_until_pod_is_running=True, wait_until_pod_is_ready=False):
    run(" ".join([
        "kubectl apply",
        "-f %(DEPLOYMENT_TEMP_DIR)s/deploy/kubernetes/"+component_label+"/"+component_label+".%(DEPLOY_TO_PREFIX)s.yaml"
    ]) % settings)

    if wait_until_pod_is_running:
        _wait_until_pod_is_running(component_label, deployment_target=settings["DEPLOY_TO"])

    if wait_until_pod_is_ready:
        _wait_until_pod_is_ready(component_label, deployment_target=settings["DEPLOY_TO"])
def troubleshoot_component(component, deployment_target):
    """Runs kubectl command to print detailed debug output for the given component.

    Args:
        component (string): component label (eg. "postgres")
        deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "minikube", "gcloud-dev", etc.
    """

    pod_name = get_pod_name(component, deployment_target=deployment_target)

    run("kubectl get pods -o yaml %(pod_name)s" % locals(), verbose=True)
def delete_pod(component_label, settings, custom_yaml_filename=None):
    deployment_target = settings["DEPLOY_TO"]

    yaml_filename = custom_yaml_filename or (component_label+".%(DEPLOY_TO_PREFIX)s.yaml")

    if is_pod_running(component_label, deployment_target):
        run(" ".join([
            "kubectl delete",
            "-f %(DEPLOYMENT_TEMP_DIR)s/deploy/kubernetes/"+component_label+"/"+yaml_filename,
            ]) % settings, errors_to_ignore=["not found"])

    logger.info("waiting for \"%s\" to exit Running status" % component_label)
    while is_pod_running(component_label, deployment_target):
        time.sleep(5)
Exemple #22
0
def deploy_seqr(settings):
    print_separator("seqr")

    docker_build(
        "seqr",
        settings,
        [
            "--build-arg SEQR_SERVICE_PORT=%s" % settings["SEQR_SERVICE_PORT"],
            "--build-arg SEQR_UI_DEV_PORT=%s" % settings["SEQR_UI_DEV_PORT"],
            "-f deploy/docker/%(COMPONENT_LABEL)s/%(DEPLOY_TO_PREFIX)s/Dockerfile"
        ],
    )

    restore_seqr_db_from_backup = settings.get("RESTORE_SEQR_DB_FROM_BACKUP")
    reset_db = settings.get("RESET_DB")

    deployment_target = settings["DEPLOY_TO"]
    postgres_pod_name = get_pod_name("postgres", deployment_target=deployment_target)

    if settings["DELETE_BEFORE_DEPLOY"]:
        delete_pod("seqr", settings)
    elif reset_db or restore_seqr_db_from_backup:
        seqr_pod_name = get_pod_name('seqr', deployment_target=deployment_target)
        if seqr_pod_name:
            _wait_until_pod_is_running("seqr", deployment_target=deployment_target)

            run_in_pod(seqr_pod_name, "/usr/local/bin/stop_server.sh" % locals(), verbose=True)

    if reset_db:
        run_in_pod(postgres_pod_name, "psql -U postgres postgres -c 'drop database seqrdb'" % locals(),
            errors_to_ignore=["does not exist"],
            verbose=True,
        )

    if restore_seqr_db_from_backup:
        run_in_pod(postgres_pod_name, "psql -U postgres postgres -c 'drop database seqrdb'" % locals(),
            errors_to_ignore=["does not exist"],
            verbose=True,
        )
        run_in_pod(postgres_pod_name, "psql -U postgres postgres -c 'create database seqrdb'" % locals(), verbose=True)
        run("kubectl cp '%(restore_seqr_db_from_backup)s' %(postgres_pod_name)s:/root/$(basename %(restore_seqr_db_from_backup)s)" % locals(), verbose=True)
        run_in_pod(postgres_pod_name, "/root/restore_database_backup.sh postgres seqrdb /root/$(basename %(restore_seqr_db_from_backup)s)" % locals(), verbose=True)
        run_in_pod(postgres_pod_name, "rm /root/$(basename %(restore_seqr_db_from_backup)s)" % locals(), verbose=True)
    else:
        run_in_pod(postgres_pod_name, "psql -U postgres postgres -c 'create database seqrdb'" % locals(),
            errors_to_ignore=["already exists"],
            verbose=True,
        )

    _deploy_pod("seqr", settings, wait_until_pod_is_ready=True)
def deploy_pod(component_label, settings, wait_until_pod_is_running=True, wait_until_pod_is_ready=False):
    if settings["ONLY_PUSH_TO_REGISTRY"]:
        return

    run(" ".join([
        "kubectl apply",
        "-f %(DEPLOYMENT_TEMP_DIR)s/deploy/kubernetes/"+component_label+"/"+component_label+".%(DEPLOY_TO_PREFIX)s.yaml"
    ]) % settings)

    if wait_until_pod_is_running:
        sleep_until_pod_is_running(component_label, deployment_target=settings["DEPLOY_TO"])

    if wait_until_pod_is_ready:
        sleep_until_pod_is_ready(component_label, deployment_target=settings["DEPLOY_TO"])
Exemple #24
0
def deploy_pod(component_label, settings, wait_until_pod_is_running=True, wait_until_pod_is_ready=False):
    if settings["ONLY_PUSH_TO_REGISTRY"]:
        return

    run(" ".join([
        "kubectl apply",
        "-f %(DEPLOYMENT_TEMP_DIR)s/deploy/kubernetes/"+component_label+"/"+component_label+".%(DEPLOY_TO_PREFIX)s.yaml"
    ]) % settings)

    if wait_until_pod_is_running:
        sleep_until_pod_is_running(component_label, deployment_target=settings["DEPLOY_TO"])

    if wait_until_pod_is_ready:
        sleep_until_pod_is_ready(component_label, deployment_target=settings["DEPLOY_TO"])
Exemple #25
0
def set_environment(deployment_target):
    """Configure the shell environment to point to the given deployment_target.

    Args:
        deployment_target (string): "minikube", "gcloud-dev", etc. See constants.DEPLOYMENT_TARGETS.
    """
    if deployment_target.startswith("gcloud"):
        settings = retrieve_settings(deployment_target)

        os.environ["KUBECONFIG"] = os.path.expanduser("~/.kube/config")
        run("gcloud config set core/project %(GCLOUD_PROJECT)s" % settings,
            print_command=True)
        run("gcloud config set compute/zone %(GCLOUD_ZONE)s" % settings,
            print_command=True)
        run("gcloud container clusters get-credentials --zone=%(GCLOUD_ZONE)s %(CLUSTER_NAME)s"
            % settings,
            print_command=True)
    elif deployment_target == "minikube":
        run("kubectl config use-context minikube", print_command=True)
    elif deployment_target == "kube-solo":
        os.environ["KUBECONFIG"] = os.path.expanduser(
            "~/kube-solo/kube/kubeconfig")
    else:
        raise ValueError("Unexpected deployment_target value: %s" %
                         (deployment_target, ))
    def delete_runner(self, synchronous=False):
        """Delete the dataproc cluster created by self._create_dataproc_cluster(..)

        Args:
            synchronous (bool): Whether to wait for the deletion operation to complete before returning
        """
        cluster_id = self.cluster_id
        async_arg = "" if synchronous else "--async"

        run(" ".join([
            "gcloud dataproc clusters delete %(cluster_id)s",
            "--project",
            GCLOUD_PROJECT,
            "--quiet",
        ]) % locals()).wait()
Exemple #27
0
def get_google_bucket_file_stats(gs_path):
    gsutil_stat_output = run(_gsutil_command('stat', gs_path), verbose=False)
    """
    Example gsutil stat output:

    Creation time:          Fri, 09 Jun 2017 09:36:23 GMT
    Update time:            Fri, 09 Jun 2017 09:36:23 GMT
    Storage class:          REGIONAL
    Content-Length:         363620675
    Content-Type:           text/x-vcard
    Hash (crc32c):          SWOktA==
    Hash (md5):             fEdIumyOFR7HvULeAwXCwQ==
    ETag:                   CMae+J67sNQCEAE=
    Generation:             1497000983793478
    Metageneration:         1
    """

    if not gsutil_stat_output:
        return None

    EMPTY_MATCH_OBJ = re.match("()", "")
    DATE_FORMAT = '%a, %d %b %Y %H:%M:%S %Z'

    creation_time = (re.search("Creation.time:[\s]+(.+)", gsutil_stat_output,
                               re.IGNORECASE) or EMPTY_MATCH_OBJ).group(1)
    update_time = (re.search("Update.time:[\s]+(.+)", gsutil_stat_output,
                             re.IGNORECASE) or EMPTY_MATCH_OBJ).group(1)
    file_size = (re.search("Content-Length:[\s]+(.+)", gsutil_stat_output,
                           re.IGNORECASE) or EMPTY_MATCH_OBJ).group(1)
    file_md5 = (re.search("Hash (md5):[\s]+(.+)", gsutil_stat_output,
                          re.IGNORECASE) or EMPTY_MATCH_OBJ).group(1)

    ctime = time.mktime(time.strptime(creation_time, DATE_FORMAT))
    mtime = time.mktime(time.strptime(update_time, DATE_FORMAT))
    return FileStats(ctime=ctime, mtime=mtime, size=file_size, md5=file_md5)
def get_google_bucket_file_stats(gs_path):
    gsutil_stat_output = run("gsutil stat %(gs_path)s" % locals(), verbose=False)

    """
    Example gsutil stat output:

    Creation time:          Fri, 09 Jun 2017 09:36:23 GMT
    Update time:            Fri, 09 Jun 2017 09:36:23 GMT
    Storage class:          REGIONAL
    Content-Length:         363620675
    Content-Type:           text/x-vcard
    Hash (crc32c):          SWOktA==
    Hash (md5):             fEdIumyOFR7HvULeAwXCwQ==
    ETag:                   CMae+J67sNQCEAE=
    Generation:             1497000983793478
    Metageneration:         1
    """

    if not gsutil_stat_output:
        return None

    EMPTY_MATCH_OBJ = re.match("()", "")
    DATE_FORMAT = '%a, %d %b %Y %H:%M:%S %Z'

    creation_time = (re.search("Creation.time:[\s]+(.+)", gsutil_stat_output, re.IGNORECASE) or EMPTY_MATCH_OBJ).group(1)
    update_time = (re.search("Update.time:[\s]+(.+)", gsutil_stat_output, re.IGNORECASE) or EMPTY_MATCH_OBJ).group(1)
    file_size = (re.search("Content-Length:[\s]+(.+)", gsutil_stat_output, re.IGNORECASE) or EMPTY_MATCH_OBJ).group(1)
    file_md5 = (re.search("Hash (md5):[\s]+(.+)", gsutil_stat_output, re.IGNORECASE) or EMPTY_MATCH_OBJ).group(1)

    ctime = time.mktime(time.strptime(creation_time, DATE_FORMAT))
    mtime = time.mktime(time.strptime(update_time, DATE_FORMAT))
    return FileStats(ctime=ctime, mtime=mtime, size=file_size, md5=file_md5)
Exemple #29
0
def _get_resource_info(
        resource_type="pod",
        labels={},
        json_path=".items[0].metadata.name",
        errors_to_ignore=("array index out of bounds: index 0",),
        verbose=False,
    ):
    """Runs 'kubectl get <resource_type>' command to retrieve info about this resource.

    Args:
        resource_type (string): "pod", "service", etc.
        labels (dict): (eg. {'name': 'phenotips'})
        json_path (string): a json path query string (eg. ".items[0].metadata.name")
        errors_to_ignore (list):
        verbose (bool):
    Returns:
        (string) resource value (eg. "postgres-410765475-1vtkn")
    """

    l_arg = ""
    if labels:
        l_arg = "-l" + ",".join(["%s=%s" % (key, value) for key, value in labels.items()])

    output = run(
        "kubectl get %(resource_type)s %(l_arg)s -o jsonpath={%(json_path)s}" % locals(),
        errors_to_ignore=errors_to_ignore,
        print_command=False,
        verbose=verbose,
    )

    return output.strip('\n') if output is not None else None
def copy_files_to_or_from_pod(component, deployment_target, source_path, dest_path, direction=1):
    """Copy file(s) to or from the given component.

    Args:
        component (string): component label (eg. "postgres")
        deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "minikube", "gcloud-dev", etc.
        source_path (string): source file path. If copying files to the component, it should be a local path. Otherwise, it should be a file path inside the component pod.
        dest_path (string): destination file path. If copying files from the component, it should be a local path. Otherwise, it should be a file path inside the component pod.
        direction (int): If > 0 the file will be copied to the pod. If < 0, then it will be copied from the pod.
    """
    full_pod_name = get_pod_name(component, deployment_target=deployment_target)
    if not full_pod_name:
        raise ValueError("No '%(pod_name)s' pods found. Is the kubectl environment configured in this terminal? and has this type of pod been deployed?" % locals())

    if direction < 0:  # copy from pod
        source_path = "%s:%s" % (full_pod_name, source_path)
    elif direction > 0: # copy to pod
        dest_path = "%s:%s" % (full_pod_name, dest_path)

    run("kubectl cp '%(source_path)s' '%(dest_path)s'" % locals())
    def _get_dataproc_cluster_status(self):
        """Return cluster status (eg. "CREATING", "RUNNING", etc."""
        cluster_id = self.cluster_id

        output = run(" ".join([
            "gcloud dataproc clusters list ", "--project", GCLOUD_PROJECT,
            "--filter", "'clusterName=%(cluster_id)s'", "--format",
            "'value(status.state)'"
        ]) % locals(),
                     verbose=False)

        return output.strip()
Exemple #32
0
def create_vpc(gcloud_project, network_name):
    run(
        " ".join([
            #"gcloud compute networks create seqr-project-custom-vpc --project=%(GCLOUD_PROJECT)s --mode=custom"
            "gcloud compute networks create %(network_name)s",
            "--project=%(gcloud_project)s",
            "--mode=auto"
        ]) % locals(),
        errors_to_ignore=["already exists"])

    # add recommended firewall rules to enable ssh, etc.
    run(" ".join([
        "gcloud compute firewall-rules create custom-vpc-allow-tcp-udp-icmp",
        "--project %(gcloud_project)s",
        "--network %(network_name)s",
        "--allow tcp,udp,icmp",
        "--source-ranges 10.0.0.0/8",
    ]) % locals(),
        errors_to_ignore=["already exists"])

    run(" ".join([
        "gcloud compute firewall-rules create custom-vpc-allow-ports",
        "--project %(gcloud_project)s",
        "--network %(network_name)s",
        "--allow tcp:22,tcp:3389,icmp",
        "--source-ranges 10.0.0.0/8",
    ]) % locals(),
        errors_to_ignore=["already exists"])
Exemple #33
0
def docker_build(component_label, settings, custom_build_args=[]):
    settings = dict(settings)  # make a copy before modifying
    settings["COMPONENT_LABEL"] = component_label

    init_env_command = ""
    if settings["DEPLOY_TO"] == "minikube":
        init_env_command = "eval $(minikube docker-env); "

    run(init_env_command + " ".join(["docker build"] + custom_build_args + [
        "--no-cache" if settings["BUILD_DOCKER_IMAGE"] else "",
        "-t %(DOCKER_IMAGE_PREFIX)s/%(COMPONENT_LABEL)s",
        "deploy/docker/%(COMPONENT_LABEL)s/",
    ]) % settings,
        verbose=True)

    run(init_env_command + " ".join([
        "docker tag",
        "%(DOCKER_IMAGE_PREFIX)s/%(COMPONENT_LABEL)s",
        "%(DOCKER_IMAGE_PREFIX)s/%(COMPONENT_LABEL)s:%(TIMESTAMP)s",
    ]) % settings)

    if settings.get("DEPLOY_TO_PREFIX") == "gcloud":
        run("gcloud docker -- push %(DOCKER_IMAGE_PREFIX)s/%(COMPONENT_LABEL)s:%(TIMESTAMP)s"
            % settings,
            verbose=True)
    def run_hail(self, script_path, *script_args):
        """Submits the hail script to dataproc.  Assumes cluster has already been created.

        Args:
            script_path (string):
            script_args (list): arguments to pass to the script
        """

        cluster_id = self.cluster_id

        #hail_hash = run_shell_command(
        #    "gsutil cat gs://hail-common/latest-hash.txt")
        #hail_hash = hail_hash.strip()
        #hail_zip = "gs://hail-common/pyhail-hail-is-master-%(hail_hash)s.zip" % locals()
        #hail_jar = "gs://hail-common/hail-hail-is-master-all-spark2.0.2-%(hail_hash)s.jar" % locals()

        hail_zip = "gs://gnomad-bw2/hail-jar/hail-python.zip"
        hail_jar = "gs://gnomad-bw2/hail-jar/hail-all-spark.jar"
        hail_jar_filename = os.path.basename(hail_jar)

        with tempfile.NamedTemporaryFile("w", suffix=".zip") as utils_zip:
            utils_zip_file_path = utils_zip.name
            with zipfile.ZipFile(utils_zip_file_path, "w") as utils_zip_file:
                for utils_script in glob.glob(
                        os.path.join(BASE_DIR,
                                     "seqr/pipelines/hail/utils/*.py")):
                    utils_zip_file.write(
                        utils_script,
                        "utils/" + os.path.basename(utils_script))

            script_args_string = " ".join(script_args)
            run(" ".join([
                "gcloud dataproc jobs submit pyspark", "--project",
                GCLOUD_PROJECT, "--cluster", cluster_id, "--files", hail_jar,
                "--py-files %(hail_zip)s,%(utils_zip_file_path)s",
                "--properties=spark.files=./%(hail_jar_filename)s,spark.driver.extraClassPath=./%(hail_jar_filename)s,spark.executor.extraClassPath=./%(hail_jar_filename)s",
                "%(script_path)s -- %(script_args_string)s"
            ]) % locals()).wait()
def set_environment(deployment_target):
    """Configure the shell environment to point to the given deployment_target using 'gcloud config set-context' and other commands.

    Args:
        deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "minikube", "gcloud-dev", etc.
    """

    settings = collections.OrderedDict()
    load_settings([
        "deploy/kubernetes/shared-settings.yaml",
        "deploy/kubernetes/%(deployment_target)s-settings.yaml" % locals(),
        ], settings)

    if deployment_target.startswith("gcloud"):
        os.environ["KUBECONFIG"] = os.path.expanduser("~/.kube/config")
        run("gcloud config set core/project %(GCLOUD_PROJECT)s" % settings, print_command=True)
        run("gcloud config set compute/zone %(GCLOUD_ZONE)s" % settings, print_command=True)
        run("gcloud container clusters get-credentials --zone=%(GCLOUD_ZONE)s %(CLUSTER_NAME)s" % settings, print_command=True)
    elif deployment_target == "minikube":
        run("kubectl config use-context minikube", print_command=True)
    else:
        raise ValueError("Unexpected deployment_target value: %s" % (deployment_target,))

    run("kubectl config set-context $(kubectl config current-context) --namespace=%(NAMESPACE)s" % settings)
Exemple #36
0
def deploy_nginx(settings):
    if settings["ONLY_PUSH_TO_REGISTRY"]:
        return

    print_separator("nginx")

    run("kubectl apply -f https://raw.githubusercontent.com/kubernetes/ingress-nginx/master/deploy/mandatory.yaml" % locals())

    if settings["DELETE_BEFORE_DEPLOY"]:
        run("kubectl delete -f %(DEPLOYMENT_TEMP_DIR)s/deploy/kubernetes/nginx/nginx.yaml" % settings, errors_to_ignore=["not found"])
    run("kubectl apply -f %(DEPLOYMENT_TEMP_DIR)s/deploy/kubernetes/nginx/nginx.yaml" % settings)
def deploy_nginx(settings):
    if settings["ONLY_PUSH_TO_REGISTRY"]:
        return

    print_separator("nginx")

    run("kubectl apply -f https://raw.githubusercontent.com/kubernetes/ingress-nginx/master/deploy/mandatory.yaml" % locals())

    if settings["DELETE_BEFORE_DEPLOY"]:
        run("kubectl delete -f %(DEPLOYMENT_TEMP_DIR)s/deploy/kubernetes/nginx/nginx.yaml" % settings, errors_to_ignore=["not found"])
    run("kubectl apply -f %(DEPLOYMENT_TEMP_DIR)s/deploy/kubernetes/nginx/nginx.yaml" % settings)
Exemple #38
0
def deploy_elasticsearch_sharded(settings, component):
    if settings["ONLY_PUSH_TO_REGISTRY"]:
        return

    print_separator(component)

    if component == "es-master":
        config_files = [
            "%(DEPLOYMENT_TEMP_DIR)s/hail_elasticsearch_pipelines/kubernetes/elasticsearch-sharded/es-discovery-svc.yaml",
            "%(DEPLOYMENT_TEMP_DIR)s/hail_elasticsearch_pipelines/kubernetes/elasticsearch-sharded/es-master.yaml",
        ]
    elif component == "es-client":
        config_files = [
            "%(DEPLOYMENT_TEMP_DIR)s/hail_elasticsearch_pipelines/kubernetes/elasticsearch-sharded/es-svc.yaml",
            "%(DEPLOYMENT_TEMP_DIR)s/hail_elasticsearch_pipelines/kubernetes/elasticsearch-sharded/es-client.yaml",
        ]
    elif component == "es-data":
        config_files = [
            "%(DEPLOYMENT_TEMP_DIR)s/hail_elasticsearch_pipelines/kubernetes/elasticsearch-sharded/es-data-svc.yaml",
            "%(DEPLOYMENT_TEMP_DIR)s/hail_elasticsearch_pipelines/kubernetes/elasticsearch-sharded/es-data-stateful.yaml",
        ]
    elif component == "es-kibana":
        config_files = [
            "%(DEPLOYMENT_TEMP_DIR)s/hail_elasticsearch_pipelines/kubernetes/elasticsearch-sharded/es-kibana.yaml",
        ]
    elif component == "kibana":
        config_files = [
            "%(DEPLOYMENT_TEMP_DIR)s/deploy/kubernetes/kibana/kibana.%(DEPLOY_TO_PREFIX)s.yaml",
        ]
    else:
        raise ValueError("Unexpected component: " + component)

    if settings["DELETE_BEFORE_DEPLOY"]:
        for config_file in config_files:
            run("kubectl delete -f " + config_file % settings,
                errors_to_ignore=["not found"])

    for config_file in config_files:
        run("kubectl apply -f " + config_file % settings)

    if component in ["es-client", "es-master", "es-data", "es-kibana"]:
        # wait until all replicas are running
        num_pods = int(
            settings.get(component.replace("-", "_").upper() + "_NUM_PODS", 1))
        for pod_number_i in range(num_pods):
            sleep_until_pod_is_running(component,
                                       deployment_target=settings["DEPLOY_TO"],
                                       pod_number=pod_number_i)

    if component == "es-client":
        run("kubectl describe svc elasticsearch")
Exemple #39
0
def deploy_config_map(settings):
        # write out a ConfigMap file
    configmap_file_path = os.path.join(settings["DEPLOYMENT_TEMP_DIR"], "deploy/kubernetes/all-settings.properties")
    with open(configmap_file_path, "w") as f:
        for key, value in settings.items():
            if value is None:
                continue

            f.write('%s=%s\n' % (key, value))

    # deploy ConfigMap file so that settings key/values can be added as environment variables in each of the pods
    run("kubectl delete configmap all-settings", errors_to_ignore=["not found"])
    run("kubectl create configmap all-settings --from-file=%(configmap_file_path)s" % locals())
    run("kubectl get configmaps all-settings -o yaml")
def deploy_elasticsearch_sharded(settings, component):
    if settings["ONLY_PUSH_TO_REGISTRY"]:
        return

    print_separator(component)

    if component == "es-master":
        config_files = [
            "%(DEPLOYMENT_TEMP_DIR)s/hail_elasticsearch_pipelines/kubernetes/elasticsearch-sharded/es-discovery-svc.yaml",
            "%(DEPLOYMENT_TEMP_DIR)s/hail_elasticsearch_pipelines/kubernetes/elasticsearch-sharded/es-master.yaml",
        ]
    elif component == "es-client":
        config_files = [
            "%(DEPLOYMENT_TEMP_DIR)s/hail_elasticsearch_pipelines/kubernetes/elasticsearch-sharded/es-svc.yaml",
            "%(DEPLOYMENT_TEMP_DIR)s/hail_elasticsearch_pipelines/kubernetes/elasticsearch-sharded/es-client.yaml",
        ]
    elif component == "es-data":
        config_files = [
            "%(DEPLOYMENT_TEMP_DIR)s/hail_elasticsearch_pipelines/kubernetes/elasticsearch-sharded/es-data-svc.yaml",
            "%(DEPLOYMENT_TEMP_DIR)s/hail_elasticsearch_pipelines/kubernetes/elasticsearch-sharded/es-data-stateful.yaml",
        ]
    elif component == "es-kibana":
        config_files = [
            "%(DEPLOYMENT_TEMP_DIR)s/hail_elasticsearch_pipelines/kubernetes/elasticsearch-sharded/es-kibana.yaml",
        ]
    elif component == "kibana":
        config_files = [
            "%(DEPLOYMENT_TEMP_DIR)s/deploy/kubernetes/kibana/kibana.%(DEPLOY_TO_PREFIX)s.yaml",
        ]
    else:
        raise ValueError("Unexpected component: " + component)

    if settings["DELETE_BEFORE_DEPLOY"]:
        for config_file in config_files:
            run("kubectl delete -f " + config_file % settings, errors_to_ignore=["not found"])

    for config_file in config_files:
        run("kubectl apply -f " + config_file % settings)

    if component in ["es-client", "es-master", "es-data", "es-kibana"]:
        # wait until all replicas are running
        num_pods = int(settings.get(component.replace("-", "_").upper()+"_NUM_PODS", 1))
        for pod_number_i in range(num_pods):
            sleep_until_pod_is_running(component, deployment_target=settings["DEPLOY_TO"], pod_number=pod_number_i)

    if component == "es-client":
       run("kubectl describe svc elasticsearch")
Exemple #41
0
def deploy_init_cluster(settings):
    """Provisions a GKE cluster, persistent disks, and any other prerequisites for deployment."""

    print_separator("init-cluster")

    # initialize the VM
    if settings["DEPLOY_TO"] == "minikube":
        _init_cluster_minikube(settings)
    elif settings["DEPLOY_TO_PREFIX"] == "gcloud":
        _init_cluster_gcloud(settings)
    else:
        raise ValueError("Unexpected DEPLOY_TO_PREFIX: %(DEPLOY_TO_PREFIX)s" %
                         settings)

    node_name = get_node_name()
    if not node_name:
        raise Exception(
            "Unable to retrieve node name. Was the cluster created successfully?"
        )

    set_environment(settings["DEPLOY_TO"])

    create_namespace(settings)

    # create priority classes - " Priority affects scheduling order of Pods and out-of-resource eviction ordering
    # on the Node.... A PriorityClass is a non-namespaced object .. The higher the value, the higher the priority."
    # (from https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/#priorityclass)
    run("kubectl create priorityclass medium-priority --value=1000" % settings,
        errors_to_ignore=["already exists"])
    run("kubectl create priorityclass high-priority --value=10000" % settings,
        errors_to_ignore=["already exists"])

    # print cluster info
    run("kubectl cluster-info", verbose=True)

    # wait for the cluster to initialize
    for retry_i in range(1, 5):
        try:
            deploy_settings(settings)
            break
        except RuntimeError as e:
            logger.error((
                "Error when deploying config maps: %(e)s. This sometimes happens when cluster is "
                "initializing. Retrying...") % locals())
            time.sleep(5)
def deploy_settings(settings):
    """Deploy settings as a config map"""
    if settings["ONLY_PUSH_TO_REGISTRY"]:
        return

    # write out a ConfigMap file
    configmap_file_path = os.path.join(settings["DEPLOYMENT_TEMP_DIR"], "deploy/kubernetes/all-settings.properties")
    with open(configmap_file_path, "w") as f:
        for key, value in settings.items():
            if value is None:
                continue

            f.write('%s=%s\n' % (key, value))

    create_namespace(settings)

    run("kubectl delete configmap all-settings", errors_to_ignore=["not found"])
    run("kubectl create configmap all-settings --from-file=%(configmap_file_path)s" % locals())
    run("kubectl get configmaps all-settings -o yaml")
def deploy_cockpit(settings):
    print_separator("cockpit")

    if settings["DELETE_BEFORE_DEPLOY"]:
        delete_pod("cockpit", settings, custom_yaml_filename="cockpit.yaml")
        #"kubectl delete -f %(DEPLOYMENT_TEMP_DIR)s/deploy/kubernetes/cockpit/cockpit.yaml" % settings,

    if settings["DEPLOY_TO"] == "minikube":
        # disable username/password prompt - https://github.com/cockpit-project/cockpit/pull/6921
        run(" ".join([
            "kubectl create clusterrolebinding anon-cluster-admin-binding",
            "--clusterrole=cluster-admin",
            "--user=system:anonymous",
        ]), errors_to_ignore=["already exists"])

    run("kubectl apply -f %(DEPLOYMENT_TEMP_DIR)s/deploy/kubernetes/cockpit/cockpit.yaml" % settings)

    # print username, password for logging into cockpit
    run("kubectl config view")
def deploy_init_cluster(settings):
    """Provisions a GKE cluster, persistent disks, and any other prerequisites for deployment."""

    print_separator("init-cluster")

    # initialize the VM
    if settings["DEPLOY_TO"] == "minikube":
        _init_cluster_minikube(settings)
    elif settings["DEPLOY_TO_PREFIX"] == "gcloud":
        _init_cluster_gcloud(settings)
    else:
        raise ValueError("Unexpected DEPLOY_TO_PREFIX: %(DEPLOY_TO_PREFIX)s" % settings)

    node_name = get_node_name()
    if not node_name:
        raise Exception("Unable to retrieve node name. Was the cluster created successfully?")

    set_environment(settings["DEPLOY_TO"])

    create_namespace(settings)

    # create priority classes - " Priority affects scheduling order of Pods and out-of-resource eviction ordering
    # on the Node.... A PriorityClass is a non-namespaced object .. The higher the value, the higher the priority."
    # (from https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/#priorityclass)
    run("kubectl create priorityclass medium-priority --value=1000" % settings, errors_to_ignore=["already exists"])
    run("kubectl create priorityclass high-priority --value=10000" % settings, errors_to_ignore=["already exists"])

    # print cluster info
    run("kubectl cluster-info", verbose=True)

    # wait for the cluster to initialize
    for retry_i in range(1, 5):
        try:
            deploy_settings(settings)
            break
        except RuntimeError as e:
            logger.error(("Error when deploying config maps: %(e)s. This sometimes happens when cluster is "
                          "initializing. Retrying...") % locals())
            time.sleep(5)
def create_vpc(gcloud_project, network_name):
    run(" ".join([
        #"gcloud compute networks create seqr-project-custom-vpc --project=%(GCLOUD_PROJECT)s --mode=custom"
        "gcloud compute networks create %(network_name)s",
        "--project=%(gcloud_project)s",
        "--subnet-mode=auto"
    ]) % locals(), errors_to_ignore=["already exists"])

    # add recommended firewall rules to enable ssh, etc.
    run(" ".join([
        "gcloud compute firewall-rules create custom-vpc-allow-tcp-udp-icmp",
        "--project %(gcloud_project)s",
        "--network %(network_name)s",
        "--allow tcp,udp,icmp",
        "--source-ranges 10.0.0.0/8",
    ]) % locals(), errors_to_ignore=["already exists"])

    run(" ".join([
        "gcloud compute firewall-rules create custom-vpc-allow-ports",
        "--project %(gcloud_project)s",
        "--network %(network_name)s",
        "--allow tcp:22,tcp:3389,icmp",
        "--source-ranges 10.0.0.0/8",
    ]) % locals(), errors_to_ignore=["already exists"])
def create_namespace(settings):
    run("kubectl create -f %(DEPLOYMENT_TEMP_DIR)s/deploy/kubernetes/namespace.yaml" % settings, errors_to_ignore=["already exists"])

    # switch kubectl to use the new namespace
    run("kubectl config set-context $(kubectl config current-context) --namespace=%(NAMESPACE)s" % settings)
    if wait_until_pod_is_running:
        sleep_until_pod_is_running(component_label, deployment_target=settings["DEPLOY_TO"])

    if wait_until_pod_is_ready:
        sleep_until_pod_is_ready(component_label, deployment_target=settings["DEPLOY_TO"])


def delete_pod(component_label, settings, async=False, custom_yaml_filename=None):
    deployment_target = settings["DEPLOY_TO"]

    yaml_filename = custom_yaml_filename or (component_label+".%(DEPLOY_TO_PREFIX)s.yaml")

    if is_pod_running(component_label, deployment_target):
        run(" ".join([
            "kubectl delete",
            "-f %(DEPLOYMENT_TEMP_DIR)s/deploy/kubernetes/"+component_label+"/"+yaml_filename,
            ]) % settings, errors_to_ignore=["not found"])

    logger.info("waiting for \"%s\" to exit Running status" % component_label)
    while is_pod_running(component_label, deployment_target) and not async:
        time.sleep(5)


def create_vpc(gcloud_project, network_name):
    run(" ".join([
        #"gcloud compute networks create seqr-project-custom-vpc --project=%(GCLOUD_PROJECT)s --mode=custom"
        "gcloud compute networks create %(network_name)s",
        "--project=%(gcloud_project)s",
        "--subnet-mode=auto"
    ]) % locals(), errors_to_ignore=["already exists"])
def deploy_secrets(settings):
    """Deploys or updates k8s secrets."""

    if settings["ONLY_PUSH_TO_REGISTRY"]:
        return

    print_separator("secrets")

    create_namespace(settings)

    # deploy secrets
    for secret_label in [
        "seqr-secrets",
        "postgres-secrets",
        "nginx-secrets",
        "matchbox-secrets",
        "gcloud-client-secrets"
    ]:
        run("kubectl delete secret %(secret_label)s" % locals(), verbose=False, errors_to_ignore=["not found"])

    run(" ".join([
        "kubectl create secret generic seqr-secrets",
        "--from-file deploy/secrets/%(DEPLOY_TO_PREFIX)s/seqr/omim_key",
        "--from-file deploy/secrets/%(DEPLOY_TO_PREFIX)s/seqr/postmark_server_token",
        "--from-file deploy/secrets/%(DEPLOY_TO_PREFIX)s/seqr/mme_node_admin_token",
    ]) % settings, errors_to_ignore=["already exists"])

    run(" ".join([
        "kubectl create secret generic postgres-secrets",
        "--from-file deploy/secrets/%(DEPLOY_TO_PREFIX)s/postgres/postgres.username",
        "--from-file deploy/secrets/%(DEPLOY_TO_PREFIX)s/postgres/postgres.password",
    ]) % settings, errors_to_ignore=["already exists"])

    run(" ".join([
        "kubectl create secret generic nginx-secrets",
        "--from-file deploy/secrets/%(DEPLOY_TO_PREFIX)s/nginx-%(DEPLOY_TO)s/tls.key",
        "--from-file deploy/secrets/%(DEPLOY_TO_PREFIX)s/nginx-%(DEPLOY_TO)s/tls.crt",
    ]) % settings, errors_to_ignore=["already exists"])

    run(" ".join([
        "kubectl create secret generic matchbox-secrets",
        "--from-file deploy/secrets/%(DEPLOY_TO_PREFIX)s/matchbox/nodes.json",
        "--from-file deploy/secrets/%(DEPLOY_TO_PREFIX)s/matchbox/application.properties",
        "--from-file deploy/secrets/%(DEPLOY_TO_PREFIX)s/matchbox/config.xml",
    ]) % settings, errors_to_ignore=["already exists"])

    account_key_path = "deploy/secrets/%(DEPLOY_TO_PREFIX)s/gcloud-client/service-account-key.json" % settings
    if not os.path.isfile(account_key_path):
        account_key_path = "deploy/secrets/shared/gcloud/service-account-key.json"
    if os.path.isfile(account_key_path):
        run(" ".join([
            "kubectl create secret generic gcloud-client-secrets",
            "--from-file %(account_key_path)s",
            "--from-file deploy/secrets/shared/gcloud/boto",
        ]) % {'account_key_path': account_key_path}, errors_to_ignore=["already exists"])
    else:
        run(" ".join([
            "kubectl create secret generic gcloud-client-secrets"   # create an empty set of client secrets
        ]), errors_to_ignore=["already exists"])
def show_dashboard():
    """Opens the kubernetes dashboard in a new browser window."""

    p = run_in_background('kubectl proxy')
    run('open http://localhost:8001/ui')
    p.wait()
def deploy_phenotips(settings):
    print_separator("phenotips")

    phenotips_service_port = settings["PHENOTIPS_SERVICE_PORT"]
    restore_phenotips_db_from_backup = settings.get("RESTORE_PHENOTIPS_DB_FROM_BACKUP")
    reset_db = settings.get("RESET_DB")

    deployment_target = settings["DEPLOY_TO"]

    if reset_db or restore_phenotips_db_from_backup:
        delete_pod("phenotips", settings)
        run_in_pod("postgres", "psql -U postgres postgres -c 'drop database xwiki'" % locals(),
           verbose=True,
            errors_to_ignore=["does not exist"],
            deployment_target=deployment_target,
        )
    elif settings["DELETE_BEFORE_DEPLOY"]:
        delete_pod("phenotips", settings)

    # init postgres
    if not settings["ONLY_PUSH_TO_REGISTRY"]:
        run_in_pod("postgres",
            "psql -U postgres postgres -c \"create role xwiki with CREATEDB LOGIN PASSWORD 'xwiki'\"" % locals(),
            verbose=True,
            errors_to_ignore=["already exists"],
            deployment_target=deployment_target,
        )

        run_in_pod("postgres",
            "psql -U xwiki postgres -c 'create database xwiki'" % locals(),
            verbose=True,
            errors_to_ignore=["already exists"],
            deployment_target=deployment_target,
        )

        run_in_pod("postgres",
            "psql -U postgres postgres -c 'grant all privileges on database xwiki to xwiki'" % locals(),
        )

    # build container
    docker_build("phenotips", settings, ["--build-arg PHENOTIPS_SERVICE_PORT=%s" % phenotips_service_port])

    if settings["ONLY_PUSH_TO_REGISTRY"]:
        return

    deploy_pod("phenotips", settings, wait_until_pod_is_ready=True)

    for i in range(0, 3):
        # opening the PhenoTips website for the 1st time triggers a final set of initialization
        # steps which take ~ 1 minute, so run wget to trigger this

        try:
            run_in_pod("phenotips",
                #command="wget http://localhost:%(phenotips_service_port)s -O test.html" % locals(),
                command="curl --verbose -L -u Admin:admin http://localhost:%(phenotips_service_port)s -o test.html" % locals(),
                verbose=True
            )
        except Exception as e:
            logger.error(str(e))

        if i < 2:
            logger.info("Waiting for phenotips to start up...")
            time.sleep(10)

    if restore_phenotips_db_from_backup:
        delete_pod("phenotips", settings)

        postgres_pod_name = get_pod_name("postgres", deployment_target=deployment_target)

        run("kubectl cp '%(restore_phenotips_db_from_backup)s' %(postgres_pod_name)s:/root/$(basename %(restore_phenotips_db_from_backup)s)" % locals(), verbose=True)
        run_in_pod("postgres", "/root/restore_database_backup.sh  xwiki  xwiki  /root/$(basename %(restore_phenotips_db_from_backup)s)" % locals(), deployment_target=deployment_target, verbose=True)
        run_in_pod("postgres", "rm /root/$(basename %(restore_phenotips_db_from_backup)s)" % locals(), deployment_target=deployment_target, verbose=True)

        deploy_pod("phenotips", settings, wait_until_pod_is_ready=True)
def show_status():
    """Print status of various docker and kubernetes subsystems"""

    #run("docker info")
    #run("docker images")
    run("kubectl cluster-info", ignore_all_errors=True)
    #run("kubectl config view | grep 'username\|password'", ignore_all_errors=True)

    logger.info("==> Node IPs - for connecting to Kibana and elasticsearch via NodePorts 30002 and 30001:")
    run("kubectl describe nodes  | grep 'Name:\|ExternalIP'", ignore_all_errors=True)
    logger.info("==> elasticearch client IPs that hail can export to:")
    run("kubectl describe svc elasticsearch  | grep 'Name:\|Endpoints'", ignore_all_errors=True)

    run("kubectl get nodes", ignore_all_errors=True)
    run("kubectl get deployments --all-namespaces", ignore_all_errors=True)
    run("kubectl get services --all-namespaces", ignore_all_errors=True)
    run("kubectl get pods --all-namespaces", ignore_all_errors=True)
    run("kubectl config current-context", ignore_all_errors=True)
def delete_component(component, deployment_target=None):
    """Runs kubectl commands to delete any running deployment, service, or pod objects for the given component(s).

    Args:
        component (string): component to delete (eg. 'phenotips' or 'nginx').
        deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "minikube", "gcloud-dev", etc.
    """
    if component == "cockpit":
        run("kubectl delete rc cockpit", errors_to_ignore=["not found"])
    elif component == "es-data":
        run("kubectl delete StatefulSet es-data", errors_to_ignore=["not found"])
    elif component == "nginx":
        run("kubectl delete -f https://raw.githubusercontent.com/kubernetes/ingress-nginx/master/deploy/mandatory.yaml")

    run("kubectl delete deployments %(component)s" % locals(), errors_to_ignore=["not found"])
    run("kubectl delete services %(component)s" % locals(), errors_to_ignore=["not found"])

    pod_name = get_pod_name(component, deployment_target=deployment_target)
    if pod_name:
        run("kubectl delete pods %(pod_name)s" % locals(), errors_to_ignore=["not found"])

        logger.info("waiting for \"%s\" to exit Running status" % component)
        while is_pod_running(component, deployment_target):
            time.sleep(5)


    # print services and pods status
    run("kubectl get services" % locals(), verbose=True)
    run("kubectl get pods" % locals(), verbose=True)
def _init_cluster_minikube(settings):
    """Checks that minikube is running. If not, either starts and configures minikube, or prints instructions on how
    to do this.
    """
    try:
        # check that minikube is running
        status = run("minikube status")
    except Exception as e:
        logger.info("minikube status: %s" % str(e))

        if "MINIKUBE_MEMORY" not in settings:
            settings["MINIKUBE_MEMORY"] = str((psutil.virtual_memory().total - 4*10**9) / 10**6)  # leave 4Gb overhead
        if "MINIKUBE_NUM_CPUS" not in settings:
            settings["MINIKUBE_NUM_CPUS"] = multiprocessing.cpu_count()  # use all CPUs on machine

        minikube_start_command = (
                                     "minikube start "
                                     "--kubernetes-version=v1.11.3 "
                                     "--disk-size=%(MINIKUBE_DISK_SIZE)s "
                                     "--memory=%(MINIKUBE_MEMORY)s "
                                     "--cpus=%(MINIKUBE_NUM_CPUS)s "
                                 ) % settings

        if sys.platform.startswith('darwin'):
            # MacOSx

            # double-check that there's no minikube instance running
            run("minikube stop", ignore_all_errors=True)
            # run("minikube delete", ignore_all_errors=True)

            # haven't switched to hyperkit yet because it still has issues like https://bunnyyiu.github.io/2018-07-16-minikube-reboot/
            minikube_start_command += " --vm-driver=xhyve "
            # minikube_start_command +=  " --mount-string %(LOCAL_DATA_DIR)s:%(DATA_DIR)s --mount "

            # start minikube
            logger.info("starting minikube: ")
            run(minikube_start_command)
        else:
            if sys.platform.startswith('linux'):
                minikube_start_command += " --vm-driver=none "
            else:
                minikube_start_command += " --vm-driver=virtualbox "

            logger.info("Please run '%s' and then check that 'minikube status' shows minikube is running" % minikube_start_command)
            sys.exit(0)  # terminate installation of other components also since minikube isn't running

    # configure docker command
    run("gcloud auth configure-docker --quiet")

    # this fixes time sync issues on MacOSX which could interfere with token auth (https://github.com/kubernetes/minikube/issues/1378)
    run("minikube ssh -- docker run -i --rm --privileged --pid=host debian nsenter -t 1 -m -u -n -i date -u $(date -u +%m%d%H%M%Y)")

    # set VM max_map_count to the value required for elasticsearch
    run("minikube ssh 'sudo /sbin/sysctl -w vm.max_map_count=262144'")
def _init_cluster_gcloud(settings):
    """Starts and configures a kubernetes cluster on Google Container Engine based on parameters in settings"""

    run("gcloud config set project %(GCLOUD_PROJECT)s" % settings)

    # create private network so that dataproc jobs can connect to GKE cluster nodes
    # based on: https://medium.com/@DazWilkin/gkes-cluster-ipv4-cidr-flag-69d25884a558
    create_vpc(gcloud_project="%(GCLOUD_PROJECT)s" % settings, network_name="%(GCLOUD_PROJECT)s-auto-vpc" % settings)

    # create cluster
    run(" ".join([
        "gcloud beta container clusters create %(CLUSTER_NAME)s",
        "--enable-autorepair",
        "--enable-stackdriver-kubernetes",
        "--cluster-version %(KUBERNETES_VERSION)s",  # to get available versions, run: gcloud container get-server-config
        "--project %(GCLOUD_PROJECT)s",
        "--zone %(GCLOUD_ZONE)s",
        "--machine-type %(CLUSTER_MACHINE_TYPE)s",
        "--num-nodes 1",
        #"--network %(GCLOUD_PROJECT)s-auto-vpc",
        #"--local-ssd-count 1",
        "--scopes", "https://www.googleapis.com/auth/devstorage.read_write",
    ]) % settings, verbose=False, errors_to_ignore=["Already exists"])

    # create cluster nodes - breaking them up into node pools of several machines each.
    # This way, the cluster can be scaled up and down when needed using the technique in
    #    https://github.com/mattsolo1/gnomadjs/blob/master/cluster/elasticsearch/Makefile#L23
    #
    i = 0
    num_nodes_remaining_to_create = int(settings["CLUSTER_NUM_NODES"]) - 1
    num_nodes_per_node_pool = int(settings["NUM_NODES_PER_NODE_POOL"])
    while num_nodes_remaining_to_create > 0:
        i += 1
        run(" ".join([
            "gcloud container node-pools create %(CLUSTER_NAME)s-"+str(i),
            "--cluster %(CLUSTER_NAME)s",
            "--project %(GCLOUD_PROJECT)s",
            "--zone %(GCLOUD_ZONE)s",
            "--machine-type %(CLUSTER_MACHINE_TYPE)s",
            "--num-nodes %s" % min(num_nodes_per_node_pool, num_nodes_remaining_to_create),
            #"--network %(GCLOUD_PROJECT)s-auto-vpc",
            #"--local-ssd-count 1",
            "--scopes", "https://www.googleapis.com/auth/devstorage.read_write"
        ]) % settings, verbose=False, errors_to_ignore=["lready exists"])

        num_nodes_remaining_to_create -= num_nodes_per_node_pool

    run(" ".join([
        "gcloud container clusters get-credentials %(CLUSTER_NAME)s",
        "--project %(GCLOUD_PROJECT)s",
        "--zone %(GCLOUD_ZONE)s",
    ]) % settings)

    # create disks from snapshots
    created_disks = []
    for i, snapshot_name in enumerate([d.strip() for d in settings.get("CREATE_FROM_SNAPSHOTS", "").split(",") if d]):
        disk_name = "es-data-%d" % (i+1)
        run(" ".join([
            "gcloud compute disks create " + disk_name,
            "--zone %(GCLOUD_ZONE)s",
            "--type pd-ssd",
            "--source-snapshot " + snapshot_name,
        ]) % settings, errors_to_ignore=["lready exists"])

        created_disks.append(disk_name)

    if created_disks:
        settings["CREATE_WITH_EXISTING_DISKS"] = ",".join(created_disks)

    # create PersistentVolume objects for disk
    namespace = settings["NAMESPACE"]
    for i, existing_disk_name in enumerate([d.strip() for d in settings.get("CREATE_WITH_EXISTING_DISKS", "").split(",") if d]):
        file_path = None
        existing_disk_name = existing_disk_name.strip()
        elasticsearch_disk_size = settings["ELASTICSEARCH_DISK_SIZE"]
        with tempfile.NamedTemporaryFile("w") as f:
            f.write("""apiVersion: v1
kind: PersistentVolume
metadata:
  name: %(existing_disk_name)s
  namespace: %(namespace)s
spec:
  capacity:
    storage: %(elasticsearch_disk_size)s
  accessModes:
    - ReadWriteOnce
  persistentVolumeReclaimPolicy: Retain
  storageClassName: ssd-storage-class
  gcePersistentDisk:
    fsType: ext4
    pdName: %(existing_disk_name)s
""" % locals())
            f.flush()
            file_path = f.name
            run("kubectl create -f %(file_path)s"  % locals(), print_command=True, errors_to_ignore=["already exists"])

    # create elasticsearch disks storage class
    #run(" ".join([
    #    "kubectl apply -f %(DEPLOYMENT_TEMP_DIR)s/deploy/kubernetes/elasticsearch/ssd-storage-class.yaml" % settings,
    #]))

    #run(" ".join([
    #    "gcloud compute disks create %(CLUSTER_NAME)s-elasticsearch-disk-0  --type=pd-ssd --zone=us-central1-b --size=%(ELASTICSEARCH_DISK_SIZE)sGi" % settings,
    #]), errors_to_ignore=["already exists"])

    #run(" ".join([
    #    "kubectl apply -f %(DEPLOYMENT_TEMP_DIR)s/deploy/kubernetes/elasticsearch/es-persistent-volume.yaml" % settings,
    #]))


    # if cluster was already created previously, update its size to match CLUSTER_NUM_NODES
    #run(" ".join([
    #    "gcloud container clusters resize %(CLUSTER_NAME)s --size %(CLUSTER_NUM_NODES)s" % settings,
    #]), is_interactive=True)

    # create persistent disks
    for label in ("postgres", "seqr-static-files"): # "mongo"): # , "elasticsearch-sharded"):  # "elasticsearch"
        run(" ".join([
            "gcloud compute disks create",
            "--zone %(GCLOUD_ZONE)s",
            "--size %("+label.upper().replace("-", "_")+"_DISK_SIZE)s",
            "%(CLUSTER_NAME)s-"+label+"-disk",
            ]) % settings, verbose=True, errors_to_ignore=["already exists"])
def copy_local_file(dataset_file_path, dest_file_path):
    if not os.path.exists(dataset_file_path):
        raise ValueError("%(dataset_file_path)s not found" % locals())

    run("cp -r %(dataset_file_path)s %(dest_file_path)s" % locals())
def delete_all(deployment_target):
    """Runs kubectl and gcloud commands to delete the given cluster and all objects in it.

    Args:
        deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "minikube", "gcloud-dev", etc.

    """
    settings = {}

    load_settings([
        "deploy/kubernetes/shared-settings.yaml",
        "deploy/kubernetes/%(deployment_target)s-settings.yaml" % locals(),
    ], settings)

    if settings.get("DEPLOY_TO_PREFIX") == "gcloud":
        run("gcloud container clusters delete --project %(GCLOUD_PROJECT)s --zone %(GCLOUD_ZONE)s --no-async %(CLUSTER_NAME)s" % settings, is_interactive=True)

        run("gcloud compute disks delete --zone %(GCLOUD_ZONE)s %(CLUSTER_NAME)s-postgres-disk" % settings, is_interactive=True)
        #run("gcloud compute disks delete --zone %(GCLOUD_ZONE)s %(CLUSTER_NAME)s-mongo-disk" % settings, is_interactive=True)
        #run("gcloud compute disks delete --zone %(GCLOUD_ZONE)s %(CLUSTER_NAME)s-elasticsearch-disk" % settings, is_interactive=True)
    else:
        run('kubectl delete deployments --all')
        run('kubectl delete replicationcontrollers --all')
        run('kubectl delete services --all')
        run('kubectl delete StatefulSets --all')
        run('kubectl delete pods --all')

        run('docker kill $(docker ps -q)', errors_to_ignore=["requires at least 1 arg"])
        run('docker rmi -f $(docker images -q)', errors_to_ignore=["requires at least 1 arg"])