def reset_database(database=[], deployment_target=None): """Runs kubectl commands to delete and reset the given database(s). Args: component (list): one more database labels - "seqrdb", "phenotipsdb", "mongodb" deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "minikube", "gcloud-dev", etc. """ if "seqrdb" in database: postgres_pod_name = get_pod_name("postgres", deployment_target=deployment_target) if not postgres_pod_name: logger.error("postgres pod must be running") else: run_in_pod(postgres_pod_name, "psql -U postgres postgres -c 'drop database seqrdb'" % locals(), errors_to_ignore=["does not exist"]) run_in_pod(postgres_pod_name, "psql -U postgres postgres -c 'create database seqrdb'" % locals()) if "phenotipsdb" in database: postgres_pod_name = get_pod_name("postgres", deployment_target=deployment_target) if not postgres_pod_name: logger.error("postgres pod must be running") else: run_in_pod(postgres_pod_name, "psql -U xwiki postgres -c 'drop database xwiki'" % locals(), errors_to_ignore=["does not exist"]) run_in_pod(postgres_pod_name, "psql -U xwiki postgres -c 'create database xwiki'" % locals()) #run("kubectl exec %(postgres_pod_name)s -- psql -U postgres xwiki < data/init_phenotipsdb.sql" % locals()) if "mongodb" in database: mongo_pod_name = get_pod_name("mongo", deployment_target=deployment_target) if not mongo_pod_name: logger.error("mongo pod must be running") else: run_in_pod(mongo_pod_name, "mongo datastore --eval 'db.dropDatabase()'" % locals())
def reset_database(database=[], deployment_target=None): """Runs kubectl commands to delete and reset the given database(s). Args: component (list): one more database labels - "seqrdb", "phenotipsdb", deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "minikube", "gcloud-dev", etc. """ if "seqrdb" in database: postgres_pod_name = get_pod_name("postgres", deployment_target=deployment_target) if not postgres_pod_name: logger.error("postgres pod must be running") else: run_in_pod(postgres_pod_name, "psql -U postgres postgres -c 'drop database seqrdb'" % locals(), errors_to_ignore=["does not exist"]) run_in_pod( postgres_pod_name, "psql -U postgres postgres -c 'create database seqrdb'" % locals()) if "phenotipsdb" in database: postgres_pod_name = get_pod_name("postgres", deployment_target=deployment_target) if not postgres_pod_name: logger.error("postgres pod must be running") else: run_in_pod(postgres_pod_name, "psql -U xwiki postgres -c 'drop database xwiki'" % locals(), errors_to_ignore=["does not exist"]) run_in_pod( postgres_pod_name, "psql -U xwiki postgres -c 'create database xwiki'" % locals())
def deploy_seqr(settings): print_separator("seqr") if settings["BUILD_DOCKER_IMAGES"]: seqr_git_hash = run("git log -1 --pretty=%h", errors_to_ignore=["Not a git repository"]) seqr_git_hash = (":" + seqr_git_hash.strip()) if seqr_git_hash is not None else "" docker_build("seqr", settings, [ "--build-arg SEQR_SERVICE_PORT=%s" % settings["SEQR_SERVICE_PORT"], "--build-arg SEQR_UI_DEV_PORT=%s" % settings["SEQR_UI_DEV_PORT"], "-f deploy/docker/seqr/Dockerfile", "-t %(DOCKER_IMAGE_NAME)s" + seqr_git_hash, ] ) if settings["ONLY_PUSH_TO_REGISTRY"]: return restore_seqr_db_from_backup = settings.get("RESTORE_SEQR_DB_FROM_BACKUP") reset_db = settings.get("RESET_DB") deployment_target = settings["DEPLOY_TO"] postgres_pod_name = get_pod_name("postgres", deployment_target=deployment_target) if settings["DELETE_BEFORE_DEPLOY"]: delete_pod("seqr", settings) elif reset_db or restore_seqr_db_from_backup: seqr_pod_name = get_pod_name('seqr', deployment_target=deployment_target) if seqr_pod_name: sleep_until_pod_is_running("seqr", deployment_target=deployment_target) run_in_pod(seqr_pod_name, "/usr/local/bin/stop_server.sh", verbose=True) if reset_db: run_in_pod(postgres_pod_name, "psql -U postgres postgres -c 'drop database seqrdb'", errors_to_ignore=["does not exist"], verbose=True, ) if restore_seqr_db_from_backup: run_in_pod(postgres_pod_name, "psql -U postgres postgres -c 'drop database seqrdb'", errors_to_ignore=["does not exist"], verbose=True, ) run_in_pod(postgres_pod_name, "psql -U postgres postgres -c 'create database seqrdb'", verbose=True) run("kubectl cp '%(restore_seqr_db_from_backup)s' %(postgres_pod_name)s:/root/$(basename %(restore_seqr_db_from_backup)s)" % locals(), verbose=True) run_in_pod(postgres_pod_name, "/root/restore_database_backup.sh postgres seqrdb /root/$(basename %(restore_seqr_db_from_backup)s)" % locals(), verbose=True) run_in_pod(postgres_pod_name, "rm /root/$(basename %(restore_seqr_db_from_backup)s)" % locals(), verbose=True) else: run_in_pod(postgres_pod_name, "psql -U postgres postgres -c 'create database seqrdb'", errors_to_ignore=["already exists"], verbose=True, ) deploy_pod("seqr", settings, wait_until_pod_is_ready=True)
def deploy_seqr(settings): print_separator("seqr") if settings["BUILD_DOCKER_IMAGES"]: seqr_git_hash = run("git log -1 --pretty=%h", errors_to_ignore=["Not a git repository"]) seqr_git_hash = (":" + seqr_git_hash.strip()) if seqr_git_hash is not None else "" docker_build("seqr", settings, [ "--build-arg SEQR_SERVICE_PORT=%s" % settings["SEQR_SERVICE_PORT"], "--build-arg SEQR_UI_DEV_PORT=%s" % settings["SEQR_UI_DEV_PORT"], "-f deploy/docker/seqr/Dockerfile", "-t %(DOCKER_IMAGE_NAME)s" + seqr_git_hash, ] ) if settings["ONLY_PUSH_TO_REGISTRY"]: return restore_seqr_db_from_backup = settings.get("RESTORE_SEQR_DB_FROM_BACKUP") reset_db = settings.get("RESET_DB") deployment_target = settings["DEPLOY_TO"] postgres_pod_name = get_pod_name("postgres", deployment_target=deployment_target) if settings["DELETE_BEFORE_DEPLOY"]: delete_pod("seqr", settings) elif reset_db or restore_seqr_db_from_backup: seqr_pod_name = get_pod_name('seqr', deployment_target=deployment_target) if seqr_pod_name: sleep_until_pod_is_running("seqr", deployment_target=deployment_target) run_in_pod(seqr_pod_name, "/usr/local/bin/stop_server.sh", verbose=True) if reset_db: run_in_pod(postgres_pod_name, "psql -U postgres postgres -c 'drop database seqrdb'", errors_to_ignore=["does not exist"], verbose=True, ) if restore_seqr_db_from_backup: run_in_pod(postgres_pod_name, "psql -U postgres postgres -c 'drop database seqrdb'", errors_to_ignore=["does not exist"], verbose=True, ) run_in_pod(postgres_pod_name, "psql -U postgres postgres -c 'create database seqrdb'", verbose=True) run("kubectl cp '%(restore_seqr_db_from_backup)s' %(postgres_pod_name)s:/root/$(basename %(restore_seqr_db_from_backup)s)" % locals(), verbose=True) run_in_pod(postgres_pod_name, "/root/restore_database_backup.sh postgres seqrdb /root/$(basename %(restore_seqr_db_from_backup)s)" % locals(), verbose=True) run_in_pod(postgres_pod_name, "rm /root/$(basename %(restore_seqr_db_from_backup)s)" % locals(), verbose=True) else: run_in_pod(postgres_pod_name, "psql -U postgres postgres -c 'create database seqrdb'", errors_to_ignore=["already exists"], verbose=True, ) deploy_pod("seqr", settings, wait_until_pod_is_ready=True)
def delete_component(component, deployment_target=None): """Runs kubectl commands to delete any running deployment, service, or pod objects for the given component(s). Args: component (string): component to delete (eg. 'phenotips' or 'nginx'). deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "minikube", "gcloud-dev", etc. """ if component == "cockpit": run("kubectl delete rc cockpit", errors_to_ignore=["not found"]) elif component == "es-data": run("kubectl delete StatefulSet es-data", errors_to_ignore=["not found"]) elif component == "nginx": run("kubectl delete -f https://raw.githubusercontent.com/kubernetes/ingress-nginx/master/deploy/mandatory.yaml") run("kubectl delete deployments %(component)s" % locals(), errors_to_ignore=["not found"]) run("kubectl delete services %(component)s" % locals(), errors_to_ignore=["not found"]) pod_name = get_pod_name(component, deployment_target=deployment_target) if pod_name: run("kubectl delete pods %(pod_name)s" % locals(), errors_to_ignore=["not found"]) logger.info("waiting for \"%s\" to exit Running status" % component) while is_pod_running(component, deployment_target): time.sleep(5) # print services and pods status run("kubectl get services" % locals(), verbose=True) run("kubectl get pods" % locals(), verbose=True)
def delete_component(component, deployment_target=None): """Runs kubectl commands to delete any running deployment, service, or pod objects for the given component(s). Args: component (string): component to delete (eg. 'phenotips' or 'nginx'). deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "minikube", "gcloud-dev", etc. """ if component == "cockpit": run("kubectl delete rc cockpit", errors_to_ignore=["not found"]) elif component == "es-data": run("kubectl delete StatefulSet es-data", errors_to_ignore=["not found"]) elif component == "nginx": run("kubectl delete -f https://raw.githubusercontent.com/kubernetes/ingress-nginx/master/deploy/mandatory.yaml" ) run("kubectl delete deployments %(component)s" % locals(), errors_to_ignore=["not found"]) run("kubectl delete services %(component)s" % locals(), errors_to_ignore=["not found"]) pod_name = get_pod_name(component, deployment_target=deployment_target) if pod_name: run("kubectl delete pods %(pod_name)s" % locals(), errors_to_ignore=["not found"]) logger.info("waiting for \"%s\" to exit Running status" % component) while is_pod_running(component, deployment_target): time.sleep(5) # print services and pods status run("kubectl get services" % locals(), verbose=True) run("kubectl get pods" % locals(), verbose=True)
def update_reference_data(deployment_target): """DEPRECATED. Load reference data into mongodb. Args: deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "minikube", "gcloud-dev", etc. """ check_kubernetes_context(deployment_target) pod_name = get_pod_name('seqr', deployment_target=deployment_target) if not pod_name: raise ValueError( "No 'seqr' pods found. Is the kubectl environment configured in this terminal? and have either of these pods been deployed?" % locals()) # commented out because this is not loaded from settings backup #run_in_pod(pod_name, "python2.7 -u manage.py update_all_reference_data --omim-key '$OMIM_KEY'" % locals(), verbose=True, print_command=True) run_in_pod(pod_name, "mkdir -p /seqr/data/reference_data") run_in_pod( pod_name, "wget -N https://storage.googleapis.com/seqr-reference-data/seqr-resource-bundle.tar.gz -O /seqr/data/reference_data/seqr-resource-bundle.tar.gz" ) run_in_pod( pod_name, "tar xzf /seqr/data/reference_data/seqr-resource-bundle.tar.gz -C /seqr/data/reference_data", verbose=True) run_in_pod(pod_name, "rm /seqr/data/reference_data/seqr-resource-bundle.tar.gz") # load legacy resources run_in_pod(pod_name, "python -u manage.py load_resources", verbose=True) run_in_pod(pod_name, "python -u manage.py load_omim", verbose=True)
def copy_files_to_or_from_pod(component, deployment_target, source_path, dest_path, direction=1): """Copy file(s) to or from the given component. Args: component (string): component label (eg. "postgres") deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "minikube", "gcloud-dev", etc. source_path (string): source file path. If copying files to the component, it should be a local path. Otherwise, it should be a file path inside the component pod. dest_path (string): destination file path. If copying files from the component, it should be a local path. Otherwise, it should be a file path inside the component pod. direction (int): If > 0 the file will be copied to the pod. If < 0, then it will be copied from the pod. """ full_pod_name = get_pod_name(component, deployment_target=deployment_target) if not full_pod_name: raise ValueError( "No '%(pod_name)s' pods found. Is the kubectl environment configured in this terminal? and has this type of pod been deployed?" % locals()) if direction < 0: # copy from pod source_path = "%s:%s" % (full_pod_name, source_path) elif direction > 0: # copy to pod dest_path = "%s:%s" % (full_pod_name, dest_path) run("kubectl cp '%(source_path)s' '%(dest_path)s'" % locals())
def port_forward(component_port_pairs=[], deployment_target=None, wait=True, open_browser=False, use_kubectl_proxy=False): """Executes kubectl command to set up port forwarding between localhost and the given pod. While this is running, connecting to localhost:<port> will be the same as connecting to that port from the pod's internal network. Args: component_port_pairs (list): 2-tuple(s) containing keyword to use for looking up a kubernetes pod, along with the port to forward to that pod (eg. ('phenotips', 8080)) deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "gcloud-dev" wait (bool): Whether to block indefinitely as long as the forwarding process is running. open_browser (bool): If component_port_pairs includes components that have an http server (eg. "seqr" or "phenotips"), then open a web browser window to the forwarded port. use_kubectl_proxy (bool): Whether to use kubectl proxy instead of kubectl port-forward (see https://kubernetes.io/docs/tasks/access-application-cluster/access-cluster/#manually-constructing-apiserver-proxy-urls) Returns: (list): Popen process objects for the kubectl port-forward processes. """ procs = [] for component_label, port in component_port_pairs: if component_label == "kube-scan": continue # See https://github.com/octarinesec/kube-scan for how to connect to the kube-scan pod. wait_until_pod_is_running(component_label, deployment_target) logger.info("Forwarding port %s for %s" % (port, component_label)) pod_name = get_pod_name(component_label, deployment_target=deployment_target) if use_kubectl_proxy: command = "kubectl proxy --port 8001" else: command = "kubectl port-forward %(pod_name)s %(port)s" % locals() p = run_in_background(command) if open_browser and component_label in COMPONENTS_TO_OPEN_IN_BROWSER: if use_kubectl_proxy: url = "http://localhost:8001/api/v1/namespaces/default/services/%(component_label)s:%(port)s/proxy/" % locals( ) else: url = "http://localhost:%s" % port time.sleep(3) os.system("open " + url) procs.append(p) if wait: wait_for(procs) return procs
def troubleshoot_component(component, deployment_target): """Runs kubectl command to print detailed debug output for the given component. Args: component (string): component label (eg. "postgres") deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "minikube", "gcloud-dev", etc. """ pod_name = get_pod_name(component, deployment_target=deployment_target) run("kubectl get pods -o yaml %(pod_name)s" % locals(), verbose=True)
def load_dataset(deployment_target, project_name, genome_version, sample_type, dataset_type, vcf, memory_to_use=None, cpu_limit=None, **kwargs): """Load dataset into elasticsearch. """ pod_name = get_pod_name('pipeline-runner', deployment_target=deployment_target) # run load command additional_load_command_args = " ".join("--%s '%s'" % (key.lower().replace("_", "-"), value) for key, value in kwargs.items() if value is not None) if deployment_target == "minikube": vcf_name = os.path.basename(vcf) path_in_pod = "/data/{}".format(vcf_name) if os.path.isfile(vcf): run("kubectl cp '%(vcf)s' '%(pod_name)s:%(path_in_pod)s'" % locals()) # if local file path, copy file into pod elif vcf.startswith("http"): run_in_pod(pod_name, "wget -N %(vcf)s -O %(path_in_pod)s" % locals()) elif vcf.startswith("gs:"): run_in_pod(pod_name, "gsutil cp -n %(vcf)s %(path_in_pod)s" % locals()) vcf = path_in_pod total_memory = psutil.virtual_memory().total - 6*10**9 # leave 6Gb for other processes memory_to_use = "%sG" % (total_memory / 2 / 10**9) if memory_to_use is None else memory_to_use # divide available memory evenly between spark driver & executor cpu_limit = max(1, psutil.cpu_count() / 2) if cpu_limit is None else cpu_limit load_command = """/hail-elasticsearch-pipelines/run_hail_locally.sh \ --driver-memory %(memory_to_use)s \ --executor-memory %(memory_to_use)s \ hail_scripts/v01/load_dataset_to_es.py \ --cpu-limit %(cpu_limit)s \ --genome-version %(genome_version)s \ --project-guid %(project_name)s \ --sample-type %(sample_type)s \ --dataset-type %(dataset_type)s \ --skip-validation \ --exclude-hgmd \ --vep-block-size 100 \ --es-block-size 10 \ --num-shards 1 \ --max-samples-per-index 99 \ %(additional_load_command_args)s \ %(vcf)s """ % locals() else: load_command = """/hail-elasticsearch-pipelines/run_hail_on_dataproc.sh \ hail_scripts/v01/load_dataset_to_es.py \ --genome-version %(genome_version)s \ --project-guid %(project_name)s \ --sample-type %(sample_type)s \ --dataset-type %(dataset_type)s \ %(additional_load_command_args)s \ %(vcf)s """ % locals() run_in_pod(pod_name, load_command, verbose=True)
def troubleshoot_component(component, deployment_target): """Runs kubectl command to print detailed debug output for the given component. Args: component (string): component label (eg. "postgres") deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "minikube", "gcloud-dev", etc. """ pod_name = get_pod_name(component, deployment_target=deployment_target) run("kubectl get pods -o yaml %(pod_name)s" % locals(), verbose=True)
def print_log(components, deployment_target, enable_stream_log, previous=False, wait=True): """Executes kubernetes command to print logs for the given pod. Args: components (list): one or more kubernetes pod labels (eg. 'phenotips' or 'nginx'). If more than one is specified, logs will be printed from all components in parallel. deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "gcloud-dev", etc. enable_stream_log (bool): whether to continuously stream the log instead of just printing the log up to now. previous (bool): Prints logs from a previous instance of the container. This is useful for debugging pods that don't start or immediately enter crash-loop. wait (bool): If False, this method will return without waiting for the log streaming process to finish printing all logs. Returns: (list): Popen process objects for the kubectl port-forward processes. """ stream_arg = "-f" if enable_stream_log else "" previous_flag = "--previous" if previous else "" procs = [] for component_label in components: if component_label == "kube-scan": continue # See https://github.com/octarinesec/kube-scan for how to connect to the kube-scan pod. if not previous: wait_until_pod_is_running(component_label, deployment_target) pod_name = get_pod_name(component_label, deployment_target=deployment_target) p = run_in_background( "kubectl logs %(stream_arg)s %(previous_flag)s %(pod_name)s" % locals(), print_command=True) def print_command_log(): for line in iter(p.stdout.readline, ''): logger.info(line.strip('\n')) t = Thread(target=print_command_log) t.start() procs.append(p) if wait: wait_for(procs) return procs
def load_example_project(deployment_target, genome_version="37", cpu_limit=None, start_with_step=None): """Load example project Args: deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "minikube", "gcloud-dev", etc. genome_version (string): reference genome version - either "37" or "38" """ project_name = "1kg" check_kubernetes_context(deployment_target) pod_name = get_pod_name('seqr', deployment_target=deployment_target) if not pod_name: raise ValueError( "No 'seqr' pod found. Is the kubectl environment configured in this terminal? and have either of these pods been deployed?" % locals()) run_in_pod( pod_name, "wget -N https://storage.googleapis.com/seqr-reference-data/test-projects/1kg.ped" % locals()) #run_in_pod(pod_name, "gsutil cp %(ped)s ." % locals()) # TODO call APIs instead? run_in_pod( pod_name, "python2.7 -u -m manage create_project -p '1kg.ped' '%(project_name)s'" % locals(), verbose=True) if genome_version == "37": vcf_filename = "1kg.vcf.gz" elif genome_version == "38": vcf_filename = "1kg.liftover.GRCh38.vep.vcf.gz" else: raise ValueError("Unexpected genome_version: %s" % (genome_version, )) load_dataset( deployment_target, project_name=project_name, genome_version=genome_version, sample_type="WES", dataset_type="VARIANTS", cpu_limit=cpu_limit, start_with_step=start_with_step, vcf= "https://storage.googleapis.com/seqr-reference-data/test-projects/%(vcf_filename)s" % locals())
def port_forward(component_port_pairs=[], deployment_target=None, wait=True, open_browser=False, use_kubectl_proxy=False): """Executes kubectl command to set up port forwarding between localhost and the given pod. While this is running, connecting to localhost:<port> will be the same as connecting to that port from the pod's internal network. Args: component_port_pairs (list): 2-tuple(s) containing keyword to use for looking up a kubernetes pod, along with the port to forward to that pod (eg. ('mongo', 27017), or ('phenotips', 8080)) deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "minikube", "gcloud-dev", etc. wait (bool): Whether to block indefinitely as long as the forwarding process is running. open_browser (bool): If component_port_pairs includes components that have an http server (eg. "seqr" or "phenotips"), then open a web browser window to the forwarded port. use_kubectl_proxy (bool): Whether to use kubectl proxy instead of kubectl port-forward (see https://kubernetes.io/docs/tasks/access-application-cluster/access-cluster/#manually-constructing-apiserver-proxy-urls) Returns: (list): Popen process objects for the kubectl port-forward processes. """ procs = [] for component_label, port in component_port_pairs: wait_until_pod_is_running(component_label, deployment_target) logger.info("Forwarding port %s for %s" % (port, component_label)) pod_name = get_pod_name(component_label, deployment_target=deployment_target) if use_kubectl_proxy: command = "kubectl proxy --port 8001" else: command = "kubectl port-forward %(pod_name)s %(port)s" % locals() p = run_in_background(command) if open_browser and component_label in COMPONENTS_TO_OPEN_IN_BROWSER: if use_kubectl_proxy: url = "http://localhost:8001/api/v1/namespaces/default/services/%(component_label)s:%(port)s/proxy/" % locals() else: url = "http://localhost:%s" % port time.sleep(3) os.system("open " + url) procs.append(p) if wait: wait_for(procs) return procs
def copy_files_to_or_from_pod(component, deployment_target, source_path, dest_path, direction=1): """Copy file(s) to or from the given component. Args: component (string): component label (eg. "postgres") deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "minikube", "gcloud-dev", etc. source_path (string): source file path. If copying files to the component, it should be a local path. Otherwise, it should be a file path inside the component pod. dest_path (string): destination file path. If copying files from the component, it should be a local path. Otherwise, it should be a file path inside the component pod. direction (int): If > 0 the file will be copied to the pod. If < 0, then it will be copied from the pod. """ full_pod_name = get_pod_name(component, deployment_target=deployment_target) if not full_pod_name: raise ValueError("No '%(pod_name)s' pods found. Is the kubectl environment configured in this terminal? and has this type of pod been deployed?" % locals()) if direction < 0: # copy from pod source_path = "%s:%s" % (full_pod_name, source_path) elif direction > 0: # copy to pod dest_path = "%s:%s" % (full_pod_name, dest_path) run("kubectl cp '%(source_path)s' '%(dest_path)s'" % locals())
def print_log(components, deployment_target, enable_stream_log, previous=False, wait=True): """Executes kubernetes command to print logs for the given pod. Args: components (list): one or more kubernetes pod labels (eg. 'phenotips' or 'nginx'). If more than one is specified, logs will be printed from all components in parallel. deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "minikube", "gcloud-dev", etc. enable_stream_log (bool): whether to continuously stream the log instead of just printing the log up to now. previous (bool): Prints logs from a previous instance of the container. This is useful for debugging pods that don't start or immediately enter crash-loop. wait (bool): If False, this method will return without waiting for the log streaming process to finish printing all logs. Returns: (list): Popen process objects for the kubectl port-forward processes. """ stream_arg = "-f" if enable_stream_log else "" previous_flag = "--previous" if previous else "" procs = [] for component_label in components: if not previous: wait_until_pod_is_running(component_label, deployment_target) pod_name = get_pod_name(component_label, deployment_target=deployment_target) p = run_in_background("kubectl logs %(stream_arg)s %(previous_flag)s %(pod_name)s" % locals(), print_command=True) def print_command_log(): for line in iter(p.stdout.readline, ''): logger.info(line.strip('\n')) t = Thread(target=print_command_log) t.start() procs.append(p) if wait: wait_for(procs) return procs
def load_example_project(deployment_target, genome_version="37", cpu_limit=None, start_with_step=None): """Load example project Args: deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "minikube", "gcloud-dev", etc. genome_version (string): reference genome version - either "37" or "38" """ project_name = "1kg" check_kubernetes_context(deployment_target) pod_name = get_pod_name('seqr', deployment_target=deployment_target) if not pod_name: raise ValueError("No 'seqr' pod found. Is the kubectl environment configured in this terminal? and have either of these pods been deployed?" % locals()) run_in_pod(pod_name, "wget -N https://storage.googleapis.com/seqr-reference-data/test-projects/1kg.ped" % locals()) #run_in_pod(pod_name, "gsutil cp %(ped)s ." % locals()) # TODO call APIs instead? run_in_pod(pod_name, "python2.7 -u -m manage create_project -p '1kg.ped' '%(project_name)s'" % locals(), verbose=True) if genome_version == "37": vcf_filename = "1kg.vcf.gz" elif genome_version == "38": vcf_filename = "1kg.liftover.GRCh38.vep.vcf.gz" else: raise ValueError("Unexpected genome_version: %s" % (genome_version,)) load_dataset( deployment_target, project_name=project_name, genome_version=genome_version, sample_type="WES", dataset_type="VARIANTS", cpu_limit=cpu_limit, start_with_step=start_with_step, vcf="https://storage.googleapis.com/seqr-reference-data/test-projects/%(vcf_filename)s" % locals())
def update_reference_data(deployment_target): """DEPRECATED. Load reference data into mongodb. Args: deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "minikube", "gcloud-dev", etc. """ check_kubernetes_context(deployment_target) pod_name = get_pod_name('seqr', deployment_target=deployment_target) if not pod_name: raise ValueError("No 'seqr' pods found. Is the kubectl environment configured in this terminal? and have either of these pods been deployed?" % locals()) # commented out because this is not loaded from settings backup #run_in_pod(pod_name, "python2.7 -u manage.py update_all_reference_data --omim-key '$OMIM_KEY'" % locals(), verbose=True, print_command=True) run_in_pod(pod_name, "mkdir -p /seqr/data/reference_data") run_in_pod(pod_name, "wget -N https://storage.googleapis.com/seqr-reference-data/seqr-resource-bundle.tar.gz -O /seqr/data/reference_data/seqr-resource-bundle.tar.gz") run_in_pod(pod_name, "tar xzf /seqr/data/reference_data/seqr-resource-bundle.tar.gz -C /seqr/data/reference_data", verbose=True) run_in_pod(pod_name, "rm /seqr/data/reference_data/seqr-resource-bundle.tar.gz") # load legacy resources run_in_pod(pod_name, "python -u manage.py load_resources", verbose=True) run_in_pod(pod_name, "python -u manage.py load_omim", verbose=True)
def deploy_phenotips(settings): print_separator("phenotips") phenotips_service_port = settings["PHENOTIPS_SERVICE_PORT"] restore_phenotips_db_from_backup = settings.get("RESTORE_PHENOTIPS_DB_FROM_BACKUP") reset_db = settings.get("RESET_DB") deployment_target = settings["DEPLOY_TO"] if reset_db or restore_phenotips_db_from_backup: delete_pod("phenotips", settings) run_in_pod("postgres", "psql -U postgres postgres -c 'drop database xwiki'" % locals(), verbose=True, errors_to_ignore=["does not exist"], deployment_target=deployment_target, ) elif settings["DELETE_BEFORE_DEPLOY"]: delete_pod("phenotips", settings) # init postgres if not settings["ONLY_PUSH_TO_REGISTRY"]: run_in_pod("postgres", "psql -U postgres postgres -c \"create role xwiki with CREATEDB LOGIN PASSWORD 'xwiki'\"" % locals(), verbose=True, errors_to_ignore=["already exists"], deployment_target=deployment_target, ) run_in_pod("postgres", "psql -U xwiki postgres -c 'create database xwiki'" % locals(), verbose=True, errors_to_ignore=["already exists"], deployment_target=deployment_target, ) run_in_pod("postgres", "psql -U postgres postgres -c 'grant all privileges on database xwiki to xwiki'" % locals(), ) # build container docker_build("phenotips", settings, ["--build-arg PHENOTIPS_SERVICE_PORT=%s" % phenotips_service_port]) if settings["ONLY_PUSH_TO_REGISTRY"]: return deploy_pod("phenotips", settings, wait_until_pod_is_ready=True) for i in range(0, 3): # opening the PhenoTips website for the 1st time triggers a final set of initialization # steps which take ~ 1 minute, so run wget to trigger this try: run_in_pod("phenotips", #command="wget http://localhost:%(phenotips_service_port)s -O test.html" % locals(), command="curl --verbose -L -u Admin:admin http://localhost:%(phenotips_service_port)s -o test.html" % locals(), verbose=True ) except Exception as e: logger.error(str(e)) if i < 2: logger.info("Waiting for phenotips to start up...") time.sleep(10) if restore_phenotips_db_from_backup: delete_pod("phenotips", settings) postgres_pod_name = get_pod_name("postgres", deployment_target=deployment_target) run("kubectl cp '%(restore_phenotips_db_from_backup)s' %(postgres_pod_name)s:/root/$(basename %(restore_phenotips_db_from_backup)s)" % locals(), verbose=True) run_in_pod("postgres", "/root/restore_database_backup.sh xwiki xwiki /root/$(basename %(restore_phenotips_db_from_backup)s)" % locals(), deployment_target=deployment_target, verbose=True) run_in_pod("postgres", "rm /root/$(basename %(restore_phenotips_db_from_backup)s)" % locals(), deployment_target=deployment_target, verbose=True) deploy_pod("phenotips", settings, wait_until_pod_is_ready=True)
def load_dataset(deployment_target, project_name, genome_version, sample_type, dataset_type, vcf, memory_to_use=None, cpu_limit=None, **kwargs): """Load dataset into elasticsearch. """ pod_name = get_pod_name('pipeline-runner', deployment_target=deployment_target) # run load command additional_load_command_args = " ".join( "--%s '%s'" % (key.lower().replace("_", "-"), value) for key, value in kwargs.items() if value is not None) if deployment_target == "minikube": vcf_name = os.path.basename(vcf) path_in_pod = "/data/{}".format(vcf_name) if os.path.isfile(vcf): run("kubectl cp '%(vcf)s' '%(pod_name)s:%(path_in_pod)s'" % locals()) # if local file path, copy file into pod elif vcf.startswith("http"): run_in_pod(pod_name, "wget -N %(vcf)s -O %(path_in_pod)s" % locals()) elif vcf.startswith("gs:"): run_in_pod(pod_name, "gsutil cp -n %(vcf)s %(path_in_pod)s" % locals()) vcf = path_in_pod total_memory = psutil.virtual_memory( ).total - 6 * 10**9 # leave 6Gb for other processes memory_to_use = "%sG" % ( total_memory / 2 / 10**9 ) if memory_to_use is None else memory_to_use # divide available memory evenly between spark driver & executor cpu_limit = max(1, psutil.cpu_count() / 2) if cpu_limit is None else cpu_limit load_command = """/hail-elasticsearch-pipelines/run_hail_locally.sh \ --driver-memory %(memory_to_use)s \ --executor-memory %(memory_to_use)s \ hail_scripts/v01/load_dataset_to_es.py \ --cpu-limit %(cpu_limit)s \ --genome-version %(genome_version)s \ --project-guid %(project_name)s \ --sample-type %(sample_type)s \ --dataset-type %(dataset_type)s \ --skip-validation \ --exclude-hgmd \ --vep-block-size 100 \ --es-block-size 10 \ --num-shards 1 \ --max-samples-per-index 99 \ %(additional_load_command_args)s \ %(vcf)s """ % locals() else: load_command = """/hail-elasticsearch-pipelines/run_hail_on_dataproc.sh \ hail_scripts/v01/load_dataset_to_es.py \ --genome-version %(genome_version)s \ --project-guid %(project_name)s \ --sample-type %(sample_type)s \ --dataset-type %(dataset_type)s \ %(additional_load_command_args)s \ %(vcf)s """ % locals() run_in_pod(pod_name, load_command, verbose=True)
def deploy_phenotips(settings): print_separator("phenotips") phenotips_service_port = settings["PHENOTIPS_SERVICE_PORT"] restore_phenotips_db_from_backup = settings.get( "RESTORE_PHENOTIPS_DB_FROM_BACKUP") reset_db = settings.get("RESET_DB") deployment_target = settings["DEPLOY_TO"] if reset_db or restore_phenotips_db_from_backup: delete_pod("phenotips", settings) run_in_pod( "postgres", "psql -U postgres postgres -c 'drop database xwiki'" % locals(), verbose=True, errors_to_ignore=["does not exist"], deployment_target=deployment_target, ) elif settings["DELETE_BEFORE_DEPLOY"]: delete_pod("phenotips", settings) # init postgres if not settings["ONLY_PUSH_TO_REGISTRY"]: run_in_pod( "postgres", "psql -U postgres postgres -c \"create role xwiki with CREATEDB LOGIN PASSWORD 'xwiki'\"" % locals(), verbose=True, errors_to_ignore=["already exists"], deployment_target=deployment_target, ) run_in_pod( "postgres", "psql -U xwiki postgres -c 'create database xwiki'" % locals(), verbose=True, errors_to_ignore=["already exists"], deployment_target=deployment_target, ) run_in_pod( "postgres", "psql -U postgres postgres -c 'grant all privileges on database xwiki to xwiki'" % locals(), ) # build container docker_build( "phenotips", settings, ["--build-arg PHENOTIPS_SERVICE_PORT=%s" % phenotips_service_port]) if settings["ONLY_PUSH_TO_REGISTRY"]: return deploy_pod("phenotips", settings, wait_until_pod_is_ready=True) for i in range(0, 3): # opening the PhenoTips website for the 1st time triggers a final set of initialization # steps which take ~ 1 minute, so run wget to trigger this try: run_in_pod( "phenotips", #command="wget http://localhost:%(phenotips_service_port)s -O test.html" % locals(), command= "curl --verbose -L -u Admin:admin http://localhost:%(phenotips_service_port)s -o test.html" % locals(), verbose=True) except Exception as e: logger.error(str(e)) if i < 2: logger.info("Waiting for phenotips to start up...") time.sleep(10) if restore_phenotips_db_from_backup: delete_pod("phenotips", settings) postgres_pod_name = get_pod_name("postgres", deployment_target=deployment_target) run("kubectl cp '%(restore_phenotips_db_from_backup)s' %(postgres_pod_name)s:/root/$(basename %(restore_phenotips_db_from_backup)s)" % locals(), verbose=True) run_in_pod( "postgres", "/root/restore_database_backup.sh xwiki xwiki /root/$(basename %(restore_phenotips_db_from_backup)s)" % locals(), deployment_target=deployment_target, verbose=True) run_in_pod( "postgres", "rm /root/$(basename %(restore_phenotips_db_from_backup)s)" % locals(), deployment_target=deployment_target, verbose=True) deploy_pod("phenotips", settings, wait_until_pod_is_ready=True)