def __init__( self, workflow, dag, cores, jobname="snakejob.{name}.{jobid}.sh", printreason=False, quiet=False, printshellcmds=False, cluster_config=None, local_input=None, restart_times=None, assume_shared_fs=False, max_status_checks_per_second=0.5, tes_url=None, container_image=None, ): try: import tes except ImportError: raise WorkflowError( "Unable to import Python package tes. TES backend requires py-tes to be installed. Please install py-tes, e.g. via Conda or Pip." ) self.container_image = container_image or get_container_image() logger.info(f"Using {self.container_image} for TES jobs.") self.container_workdir = "/tmp" self.max_status_checks_per_second = max_status_checks_per_second self.tes_url = tes_url self.tes_client = tes.HTTPClient(url=self.tes_url) logger.info( "[TES] Job execution on TES: {url}".format(url=self.tes_url)) super().__init__( workflow, dag, None, jobname=jobname, printreason=printreason, quiet=quiet, printshellcmds=printshellcmds, cluster_config=cluster_config, local_input=local_input, restart_times=restart_times, assume_shared_fs=assume_shared_fs, max_status_checks_per_second=max_status_checks_per_second, )
def dag_to_cwl(dag): """Convert a given DAG to a CWL workflow, which is returned as JSON object. """ snakemake_cwl = { "class": "CommandLineTool", "id": "#snakemake-job", "label": "Snakemake job executor", "hints": [{ "dockerPull": get_container_image(), "class": "DockerRequirement" }], "baseCommand": "snakemake", "requirements": { "ResourceRequirement": { "coresMin": "$(inputs.cores)" } }, "arguments": [ "--force", "--keep-target-files", "--keep-remote", "--force-use-threads", "--wrapper-prefix", dag.workflow.wrapper_prefix, "--notemp", "--quiet", "--use-conda", "--no-hooks", "--nolock", "--mode", str(Mode.subprocess), ], "inputs": { "snakefile": { "type": "File", "default": { "class": "File", "location": os.path.relpath(dag.workflow.snakefile), }, "inputBinding": { "prefix": "--snakefile" }, }, "sources": { "type": "File[]", "default": [{ "class": "File", "location": f } for f in dag.workflow.get_sources()], }, "cores": { "type": "int", "default": 1, "inputBinding": { "prefix": "--cores" }, }, "rules": { "type": "string[]?", "inputBinding": { "prefix": "--allowed-rules" }, }, "input_files": { "type": "File[]", "default": [] }, "target_files": { "type": "string[]?", "inputBinding": { "position": 0 } }, }, "outputs": { "output_files": { "type": { "type": "array", "items": "File" }, "outputBinding": { "glob": "$(inputs.target_files)" }, } }, } groups = dag.get_jobs_or_groups() outputs = [] inputs = [] dag_cwl = [job_to_cwl(job, dag, outputs, inputs) for job in groups] return { "cwlVersion": "v1.0", "$graph": [ snakemake_cwl, { "class": "Workflow", "requirements": { "InlineJavascriptRequirement": {}, "MultipleInputFeatureRequirement": {}, }, "steps": dag_cwl, "inputs": inputs, "outputs": outputs, "id": "#main", }, ], }
def __init__( self, workflow, dag, cores, jobname="snakejob.{name}.{jobid}.sh", printreason=False, quiet=False, printshellcmds=False, container_image=None, regions=None, location=None, cache=False, latency_wait=3, local_input=None, restart_times=None, exec_job=None, max_status_checks_per_second=1, preemption_default=None, preemptible_rules=None, ): # Attach variables for easy access self.workflow = workflow self.quiet = quiet self.workdir = os.path.dirname(self.workflow.persistence.path) self._save_storage_cache = cache # Relative path for running on instance self._set_snakefile() # Prepare workflow sources for build package self._set_workflow_sources() exec_job = (exec_job or ("snakemake {target} --snakefile %s " "--force -j{cores} --keep-target-files --keep-remote " "--latency-wait 0 --scheduler {workflow.scheduler_type} " "--attempt 1 {use_threads} --max-inventory-time 0 " "{overwrite_config} {rules} --nocolor " "--notemp --no-hooks --nolock " % self.snakefile) + self.get_set_threads_args() + self.get_set_scatter_args()) # Set preemptible instances self._set_preemptible_rules(preemption_default, preemptible_rules) # IMPORTANT: using Compute Engine API and not k8s == no support secrets self.envvars = list(self.workflow.envvars) or [] # Quit early if we can't authenticate self._get_services() self._get_bucket() # Akin to Kubernetes, create a run namespace, default container image self.run_namespace = str(uuid.uuid4()) self.container_image = container_image or get_container_image() self.regions = regions or ["us-east1", "us-west1", "us-central1"] # The project name is required, either from client or environment self.project = (os.environ.get("GOOGLE_CLOUD_PROJECT") or self._bucket_service.project) # Determine API location based on user preference, and then regions self._set_location(location) # Tell the user right away the regions, location, and container logger.debug("regions=%s" % self.regions) logger.debug("location=%s" % self.location) logger.debug("container=%s" % self.container_image) # Keep track of build packages to clean up shutdown, and generate self._build_packages = set() targz = self._generate_build_source_package() self._upload_build_source_package(targz) # Save default resources to add later, since we need to add custom # default resources depending on the instance requested self.default_resources = self.workflow.default_resources self.workflow.default_resources.args = None super().__init__( workflow, dag, None, jobname=jobname, printreason=printreason, quiet=quiet, printshellcmds=printshellcmds, latency_wait=latency_wait, restart_times=restart_times, exec_job=exec_job, assume_shared_fs=False, max_status_checks_per_second=10, )
def __init__( self, workflow, dag, cores, jobname="snakejob.{name}.{jobid}.sh", printreason=False, quiet=False, printshellcmds=False, latency_wait=3, cluster_config=None, local_input=None, restart_times=None, assume_shared_fs=False, max_status_checks_per_second=0.5, tes_url=None, container_image=None, ): import tes self.container_image = container_image or get_container_image() self.container_workdir = "/tmp" self.max_status_checks_per_second = max_status_checks_per_second self.tes_url = tes_url self.tes_client = tes.HTTPClient(url=self.tes_url) logger.info( "[TES] Job execution on TES: {url}".format(url=self.tes_url)) exec_job = "\\\n".join(( "{envvars} ", "mkdir /tmp/conda && cd /tmp && ", "snakemake {target} ", "--snakefile {snakefile} ", "--verbose ", "--force -j{cores} ", "--keep-target-files ", "--keep-remote ", "--latency-wait 10 ", "--attempt 1 ", "{use_threads}", "{overwrite_config} {rules} ", "--nocolor ", "--notemp ", "--no-hooks ", "--nolock ", "--mode {} ".format(Mode.cluster), )) super().__init__( workflow, dag, None, jobname=jobname, printreason=printreason, quiet=quiet, printshellcmds=printshellcmds, latency_wait=latency_wait, cluster_config=cluster_config, local_input=local_input, restart_times=restart_times, exec_job=exec_job, assume_shared_fs=assume_shared_fs, max_status_checks_per_second=max_status_checks_per_second, )
def __init__( self, workflow, dag, cores, jobname="snakejob.{name}.{jobid}.sh", printreason=False, quiet=False, printshellcmds=False, container_image=None, regions=None, location=None, cache=False, local_input=None, restart_times=None, max_status_checks_per_second=1, preemption_default=None, preemptible_rules=None, ): super().__init__( workflow, dag, None, jobname=jobname, printreason=printreason, quiet=quiet, printshellcmds=printshellcmds, restart_times=restart_times, assume_shared_fs=False, max_status_checks_per_second=10, ) # Prepare workflow sources for build package self._set_workflow_sources() # Attach variables for easy access self.quiet = quiet self.workdir = os.path.realpath( os.path.dirname(self.workflow.persistence.path)) self._save_storage_cache = cache # Set preemptible instances self._set_preemptible_rules(preemption_default, preemptible_rules) # IMPORTANT: using Compute Engine API and not k8s == no support for secrets self.envvars = list(self.workflow.envvars) or [] # Quit early if we can't authenticate self._get_services() self._get_bucket() # Akin to Kubernetes, create a run namespace, default container image self.run_namespace = str(uuid.uuid4()) self.container_image = container_image or get_container_image() logger.info( f"Using {self.container_image} for Google Life Science jobs.") self.regions = regions or ["us-east1", "us-west1", "us-central1"] # The project name is required, either from client or environment self.project = (os.environ.get("GOOGLE_CLOUD_PROJECT") or self._bucket_service.project) # Determine API location based on user preference, and then regions self._set_location(location) # Tell the user right away the regions, location, and container logger.debug("regions=%s" % self.regions) logger.debug("location=%s" % self.location) logger.debug("container=%s" % self.container_image) # Keep track of build packages to clean up shutdown, and generate self._build_packages = set() targz = self._generate_build_source_package() self._upload_build_source_package(targz) # we need to add custom # default resources depending on the instance requested self.default_resources = None