Example #1
0
    def __init__(
        self,
        workflow,
        dag,
        cores,
        jobname="snakejob.{name}.{jobid}.sh",
        printreason=False,
        quiet=False,
        printshellcmds=False,
        cluster_config=None,
        local_input=None,
        restart_times=None,
        assume_shared_fs=False,
        max_status_checks_per_second=0.5,
        tes_url=None,
        container_image=None,
    ):
        try:
            import tes
        except ImportError:
            raise WorkflowError(
                "Unable to import Python package tes. TES backend requires py-tes to be installed. Please install py-tes, e.g. via Conda or Pip."
            )

        self.container_image = container_image or get_container_image()
        logger.info(f"Using {self.container_image} for TES jobs.")
        self.container_workdir = "/tmp"
        self.max_status_checks_per_second = max_status_checks_per_second
        self.tes_url = tes_url
        self.tes_client = tes.HTTPClient(url=self.tes_url)

        logger.info(
            "[TES] Job execution on TES: {url}".format(url=self.tes_url))

        super().__init__(
            workflow,
            dag,
            None,
            jobname=jobname,
            printreason=printreason,
            quiet=quiet,
            printshellcmds=printshellcmds,
            cluster_config=cluster_config,
            local_input=local_input,
            restart_times=restart_times,
            assume_shared_fs=assume_shared_fs,
            max_status_checks_per_second=max_status_checks_per_second,
        )
Example #2
0
def dag_to_cwl(dag):
    """Convert a given DAG to a CWL workflow, which is returned as JSON object.
    """
    snakemake_cwl = {
        "class":
        "CommandLineTool",
        "id":
        "#snakemake-job",
        "label":
        "Snakemake job executor",
        "hints": [{
            "dockerPull": get_container_image(),
            "class": "DockerRequirement"
        }],
        "baseCommand":
        "snakemake",
        "requirements": {
            "ResourceRequirement": {
                "coresMin": "$(inputs.cores)"
            }
        },
        "arguments": [
            "--force",
            "--keep-target-files",
            "--keep-remote",
            "--force-use-threads",
            "--wrapper-prefix",
            dag.workflow.wrapper_prefix,
            "--notemp",
            "--quiet",
            "--use-conda",
            "--no-hooks",
            "--nolock",
            "--mode",
            str(Mode.subprocess),
        ],
        "inputs": {
            "snakefile": {
                "type": "File",
                "default": {
                    "class": "File",
                    "location": os.path.relpath(dag.workflow.snakefile),
                },
                "inputBinding": {
                    "prefix": "--snakefile"
                },
            },
            "sources": {
                "type":
                "File[]",
                "default": [{
                    "class": "File",
                    "location": f
                } for f in dag.workflow.get_sources()],
            },
            "cores": {
                "type": "int",
                "default": 1,
                "inputBinding": {
                    "prefix": "--cores"
                },
            },
            "rules": {
                "type": "string[]?",
                "inputBinding": {
                    "prefix": "--allowed-rules"
                },
            },
            "input_files": {
                "type": "File[]",
                "default": []
            },
            "target_files": {
                "type": "string[]?",
                "inputBinding": {
                    "position": 0
                }
            },
        },
        "outputs": {
            "output_files": {
                "type": {
                    "type": "array",
                    "items": "File"
                },
                "outputBinding": {
                    "glob": "$(inputs.target_files)"
                },
            }
        },
    }
    groups = dag.get_jobs_or_groups()
    outputs = []
    inputs = []

    dag_cwl = [job_to_cwl(job, dag, outputs, inputs) for job in groups]

    return {
        "cwlVersion":
        "v1.0",
        "$graph": [
            snakemake_cwl,
            {
                "class": "Workflow",
                "requirements": {
                    "InlineJavascriptRequirement": {},
                    "MultipleInputFeatureRequirement": {},
                },
                "steps": dag_cwl,
                "inputs": inputs,
                "outputs": outputs,
                "id": "#main",
            },
        ],
    }
    def __init__(
        self,
        workflow,
        dag,
        cores,
        jobname="snakejob.{name}.{jobid}.sh",
        printreason=False,
        quiet=False,
        printshellcmds=False,
        container_image=None,
        regions=None,
        location=None,
        cache=False,
        latency_wait=3,
        local_input=None,
        restart_times=None,
        exec_job=None,
        max_status_checks_per_second=1,
        preemption_default=None,
        preemptible_rules=None,
    ):

        # Attach variables for easy access
        self.workflow = workflow
        self.quiet = quiet
        self.workdir = os.path.dirname(self.workflow.persistence.path)
        self._save_storage_cache = cache

        # Relative path for running on instance
        self._set_snakefile()

        # Prepare workflow sources for build package
        self._set_workflow_sources()

        exec_job = (exec_job or
                    ("snakemake {target} --snakefile %s "
                     "--force -j{cores} --keep-target-files --keep-remote "
                     "--latency-wait 0 --scheduler {workflow.scheduler_type} "
                     "--attempt 1 {use_threads} --max-inventory-time 0 "
                     "{overwrite_config} {rules} --nocolor "
                     "--notemp --no-hooks --nolock " % self.snakefile) +
                    self.get_set_threads_args() + self.get_set_scatter_args())

        # Set preemptible instances
        self._set_preemptible_rules(preemption_default, preemptible_rules)

        # IMPORTANT: using Compute Engine API and not k8s == no support secrets
        self.envvars = list(self.workflow.envvars) or []

        # Quit early if we can't authenticate
        self._get_services()
        self._get_bucket()

        # Akin to Kubernetes, create a run namespace, default container image
        self.run_namespace = str(uuid.uuid4())
        self.container_image = container_image or get_container_image()
        self.regions = regions or ["us-east1", "us-west1", "us-central1"]

        # The project name is required, either from client or environment
        self.project = (os.environ.get("GOOGLE_CLOUD_PROJECT")
                        or self._bucket_service.project)

        # Determine API location based on user preference, and then regions
        self._set_location(location)

        # Tell the user right away the regions, location, and container
        logger.debug("regions=%s" % self.regions)
        logger.debug("location=%s" % self.location)
        logger.debug("container=%s" % self.container_image)

        # Keep track of build packages to clean up shutdown, and generate
        self._build_packages = set()
        targz = self._generate_build_source_package()
        self._upload_build_source_package(targz)

        # Save default resources to add later, since we need to add custom
        # default resources depending on the instance requested
        self.default_resources = self.workflow.default_resources
        self.workflow.default_resources.args = None

        super().__init__(
            workflow,
            dag,
            None,
            jobname=jobname,
            printreason=printreason,
            quiet=quiet,
            printshellcmds=printshellcmds,
            latency_wait=latency_wait,
            restart_times=restart_times,
            exec_job=exec_job,
            assume_shared_fs=False,
            max_status_checks_per_second=10,
        )
Example #4
0
    def __init__(
        self,
        workflow,
        dag,
        cores,
        jobname="snakejob.{name}.{jobid}.sh",
        printreason=False,
        quiet=False,
        printshellcmds=False,
        latency_wait=3,
        cluster_config=None,
        local_input=None,
        restart_times=None,
        assume_shared_fs=False,
        max_status_checks_per_second=0.5,
        tes_url=None,
        container_image=None,
    ):
        import tes

        self.container_image = container_image or get_container_image()
        self.container_workdir = "/tmp"
        self.max_status_checks_per_second = max_status_checks_per_second
        self.tes_url = tes_url
        self.tes_client = tes.HTTPClient(url=self.tes_url)

        logger.info(
            "[TES] Job execution on TES: {url}".format(url=self.tes_url))

        exec_job = "\\\n".join((
            "{envvars} ",
            "mkdir /tmp/conda && cd /tmp && ",
            "snakemake {target} ",
            "--snakefile {snakefile} ",
            "--verbose ",
            "--force -j{cores} ",
            "--keep-target-files ",
            "--keep-remote ",
            "--latency-wait 10 ",
            "--attempt 1 ",
            "{use_threads}",
            "{overwrite_config} {rules} ",
            "--nocolor ",
            "--notemp ",
            "--no-hooks ",
            "--nolock ",
            "--mode {} ".format(Mode.cluster),
        ))

        super().__init__(
            workflow,
            dag,
            None,
            jobname=jobname,
            printreason=printreason,
            quiet=quiet,
            printshellcmds=printshellcmds,
            latency_wait=latency_wait,
            cluster_config=cluster_config,
            local_input=local_input,
            restart_times=restart_times,
            exec_job=exec_job,
            assume_shared_fs=assume_shared_fs,
            max_status_checks_per_second=max_status_checks_per_second,
        )
Example #5
0
    def __init__(
        self,
        workflow,
        dag,
        cores,
        jobname="snakejob.{name}.{jobid}.sh",
        printreason=False,
        quiet=False,
        printshellcmds=False,
        container_image=None,
        regions=None,
        location=None,
        cache=False,
        local_input=None,
        restart_times=None,
        max_status_checks_per_second=1,
        preemption_default=None,
        preemptible_rules=None,
    ):
        super().__init__(
            workflow,
            dag,
            None,
            jobname=jobname,
            printreason=printreason,
            quiet=quiet,
            printshellcmds=printshellcmds,
            restart_times=restart_times,
            assume_shared_fs=False,
            max_status_checks_per_second=10,
        )
        # Prepare workflow sources for build package
        self._set_workflow_sources()

        # Attach variables for easy access
        self.quiet = quiet
        self.workdir = os.path.realpath(
            os.path.dirname(self.workflow.persistence.path))
        self._save_storage_cache = cache

        # Set preemptible instances
        self._set_preemptible_rules(preemption_default, preemptible_rules)

        # IMPORTANT: using Compute Engine API and not k8s == no support for secrets
        self.envvars = list(self.workflow.envvars) or []

        # Quit early if we can't authenticate
        self._get_services()
        self._get_bucket()

        # Akin to Kubernetes, create a run namespace, default container image
        self.run_namespace = str(uuid.uuid4())
        self.container_image = container_image or get_container_image()
        logger.info(
            f"Using {self.container_image} for Google Life Science jobs.")
        self.regions = regions or ["us-east1", "us-west1", "us-central1"]

        # The project name is required, either from client or environment
        self.project = (os.environ.get("GOOGLE_CLOUD_PROJECT")
                        or self._bucket_service.project)

        # Determine API location based on user preference, and then regions
        self._set_location(location)

        # Tell the user right away the regions, location, and container
        logger.debug("regions=%s" % self.regions)
        logger.debug("location=%s" % self.location)
        logger.debug("container=%s" % self.container_image)

        # Keep track of build packages to clean up shutdown, and generate
        self._build_packages = set()
        targz = self._generate_build_source_package()
        self._upload_build_source_package(targz)

        # we need to add custom
        # default resources depending on the instance requested
        self.default_resources = None