예제 #1
0
    def run_workflow(self,
                     wdl_path: Path,
                     inputs: Optional[dict] = None,
                     expected: Optional[dict] = None,
                     **kwargs) -> dict:
        """
        Run a WDL workflow on given inputs, and check that the output matches
        given expected values.

        Args:
            wdl_path: The WDL script to execute.
            inputs: Object that will be serialized to JSON and provided to Cromwell
                as the workflow inputs.
            expected: Dict mapping output parameter names to expected values.
            kwargs: Additional keyword arguments, mostly for debugging:
                * workflow_name: Name of the workflow to run.
                * task_name: Name of the task to run if a workflow isn't defined.
                * inputs_file: Path to the Cromwell inputs file to use. Inputs are
                    written to this file only if it doesn't exist.

        Returns:
            Dict of outputs.

        Raises:
            Exception: if there was an error executing Cromwell
            AssertionError: if the actual outputs don't match the expected outputs
        """

        wdl_doc = CLI.load(str(wdl_path),
                           path=[str(path) for path in self._import_dirs],
                           check_quant=kwargs.get("check_quant", True),
                           read_source=CLI.read_source)

        namespace, is_task = get_target_name(wdl_doc=wdl_doc, **kwargs)

        inputs_dict, inputs_file = read_write_inputs(
            inputs_dict=inputs,
            namespace=namespace if not is_task else None,
        )

        target, input_env, input_json = CLI.runner_input(
            doc=wdl_doc,
            inputs=[],
            input_file=str(inputs_file) if inputs_file else None,
            empty=[],
            task=namespace if is_task else None)

        logger = logging.getLogger("miniwdl-run")
        logger.setLevel(CLI.NOTICE_LEVEL)
        CLI.install_coloredlogs(logger)

        # initialize Docker
        client = docker.from_env()
        try:
            logger.debug("dockerd :: " + json.dumps(client.version())[1:-1])
            _util.initialize_local_docker(logger, client)
        finally:
            client.close()

        try:
            if isinstance(target, Tree.Task):
                entrypoint = runtime.run_local_task
            else:
                entrypoint = runtime.run_local_workflow

            rundir, output_env = entrypoint(target,
                                            input_env,
                                            copy_input_files=kwargs.get(
                                                "copy_input_files", False))
        except Error.EvalError as err:  # TODO: test errors
            MiniwdlExecutor.log_source(logger, err)
            raise
        except Error.RuntimeError as err:
            MiniwdlExecutor.log_source(logger, err)

            if isinstance(err, runtime.error.RunFailed):
                # This will be a workflow- or a task-level failure, depending on
                # whether a workflow or task was executed. If it is workflow-level,
                # we need to get the task-level error that caused the workflow to fail.
                if isinstance(err.exe, Tree.Workflow):
                    err = err.__cause__

                task_err = cast(runtime.error.RunFailed, err)
                cause = task_err.__cause__
                failed_task_exit_status = None
                failed_task_stderr = None
                if isinstance(cause, runtime.error.CommandFailed):
                    # If the task failed due to an error in the command, populate the
                    # command exit status and stderr.
                    cmd_err = cast(runtime.error.CommandFailed, cause)
                    failed_task_exit_status = cmd_err.exit_status
                    failed_task_stderr = MiniwdlExecutor.read_miniwdl_command_std(
                        cmd_err.stderr_file)

                raise ExecutionFailedError(
                    "miniwdl",
                    namespace,
                    status="Failed",
                    inputs=task_err.exe.inputs,
                    failed_task=task_err.exe.name,
                    failed_task_exit_status=failed_task_exit_status,
                    failed_task_stderr=failed_task_stderr) from err
            else:
                raise

        outputs = CLI.values_to_json(output_env, namespace=target.name)

        if expected:
            self._validate_outputs(outputs, expected, target.name)

        return outputs
예제 #2
0
    def run_workflow(self,
                     wdl_path: Path,
                     inputs: Optional[dict] = None,
                     expected: Optional[dict] = None,
                     **kwargs) -> dict:
        """
        Run a WDL workflow on given inputs, and check that the output matches
        given expected values.

        Args:
            wdl_path: The WDL script to execute.
            inputs: Object that will be serialized to JSON and provided to Cromwell
                as the workflow inputs.
            expected: Dict mapping output parameter names to expected values.
            kwargs: Additional keyword arguments, mostly for debugging:
                * workflow_name: The name of the workflow in the WDL script. If None,
                    the name of the WDL script is used (without the .wdl extension).
                * inputs_file: Path to the Cromwell inputs file to use. Inputs are
                    written to this file only if it doesn't exist.
                * imports_file: Path to the WDL imports file to use. Imports are
                    written to this file only if it doesn't exist.
                * java_args: Additional arguments to pass to Java runtime.
                * cromwell_args: Additional arguments to pass to `cromwell run`.

        Returns:
            Dict of outputs.

        Raises:
            ExecutionFailedError: if there was an error executing Cromwell
            AssertionError: if the actual outputs don't match the expected outputs
        """
        target, is_task = get_target_name(wdl_path=wdl_path,
                                          import_dirs=self._import_dirs,
                                          **kwargs)
        if is_task:
            raise ValueError(
                "Cromwell cannot execute tasks independently of a workflow")

        inputs_dict, inputs_file = read_write_inputs(inputs_dict=inputs,
                                                     namespace=target)

        imports_file = self._get_workflow_imports(kwargs.get("imports_file"))
        inputs_arg = f"-i {inputs_file}" if inputs_file else ""
        imports_zip_arg = f"-p {imports_file}" if imports_file else ""
        java_args = kwargs.get("java_args", self.java_args) or ""
        cromwell_args = kwargs.get("cromwell_args", self._cromwell_args) or ""
        metadata_file = Path.cwd() / "metadata.json"

        cmd = (
            f"{self.java_bin} {java_args} -jar {self._cromwell_jar_file} run "
            f"-m {metadata_file} {cromwell_args} {inputs_arg} {imports_zip_arg} "
            f"{wdl_path}")
        LOG.info(f"Executing cromwell command '{cmd}' with inputs "
                 f"{json.dumps(inputs_dict, default=str)}")

        exe = subby.run(cmd, raise_on_error=False)

        metadata = None
        if metadata_file.exists():
            with open(metadata_file, "rt") as inp:
                metadata = json.load(inp)

        if exe.ok:
            if metadata:
                assert metadata["status"] == "Succeeded"
                outputs = metadata["outputs"]
            else:
                LOG.warning(
                    f"Cromwell command completed successfully but did not generate "
                    f"a metadata file at {metadata_file}")
                outputs = self._get_cromwell_outputs(exe.output)
        else:
            error_kwargs = {
                "executor": "cromwell",
                "target": target,
                "status": "Failed",
                "inputs": inputs_dict,
                "executor_stdout": exe.output,
                "executor_stderr": exe.error,
            }
            if metadata:
                failures = self._get_failures(metadata)
                if failures:
                    error_kwargs.update({
                        "failed_task":
                        failures.failed_task,
                        "failed_task_exit_status":
                        failures.failed_task_exit_status,
                        "failed_task_stdout":
                        failures.failed_task_stdout,
                        "failed_task_stderr":
                        failures.failed_task_stderr
                    })
                    if failures.num_failed > 1:
                        error_kwargs["msg"] = \
                            f"cromwell failed on {failures.num_failed} instances of " \
                            f"{failures.failed_task} of {target}; only " \
                            f"showing output from the first failed task"
                else:
                    error_kwargs[
                        "msg"] = f"cromwell failed on workflow {target}"
            else:
                error_kwargs["msg"] = \
                    f"Cromwell command failed but did not generate a metadata " \
                    f"file at {metadata_file}"

            raise ExecutionFailedError(**error_kwargs)

        if expected:
            self._validate_outputs(outputs, expected, target)

        return outputs
예제 #3
0
    def run_workflow(
        self,
        wdl_path: Path,
        inputs: Optional[dict] = None,
        expected: Optional[dict] = None,
        **kwargs,
    ) -> dict:
        """
        Run a WDL workflow on given inputs, and check that the output matches
        given expected values.

        Args:
            wdl_path: The WDL script to execute.
            inputs: Object that will be serialized to JSON and provided to Cromwell
                as the workflow inputs.
            expected: Dict mapping output parameter names to expected values.
            kwargs: Additional keyword arguments, mostly for debugging:
                * workflow_name: The name of the workflow in the WDL script. If None,
                    the name of the WDL script is used (without the .wdl extension).
                * inputs_file: Path to the Cromwell inputs file to use. Inputs are
                    written to this file only if it doesn't exist.
                * imports_file: Path to the WDL imports file to use. Imports are
                    written to this file only if it doesn't exist.
                * java_args: Additional arguments to pass to Java runtime.
                * cromwell_args: Additional arguments to pass to `cromwell run`.

        Returns:
            Dict of outputs.

        Raises:
            ExecutionFailedError: if there was an error executing Cromwell
            AssertionError: if the actual outputs don't match the expected outputs
        """
        target, is_task = get_target_name(wdl_path=wdl_path,
                                          import_dirs=self._import_dirs,
                                          **kwargs)

        if is_task:
            raise ValueError(
                "Cromwell cannot execute tasks independently of a workflow")

        inputs_dict, _ = read_write_inputs(
            inputs_file=kwargs.get("inputs_file"),
            inputs_dict=inputs,
            namespace=target,
            write_formatted_inputs=False)

        payload = {}
        payload_files = []

        def open_payload_file(path: Path, mode: str = "r") -> IO:
            open_file = open(path, mode)
            payload_files.append(open_file)
            return open_file

        try:
            payload["workflowSource"] = open_payload_file(wdl_path)

            if inputs_dict:
                payload["workflowInputs"] = json.dumps(inputs_dict,
                                                       default=str)

            imports_file = self._get_workflow_imports(
                self._import_dirs, kwargs.get("imports_file"))

            if imports_file:
                payload["workflowDependencies"] = open_payload_file(
                    imports_file, "rb")

            if self._cromwell_config_file:
                if isinstance(inputs_dict, dict):
                    payload["workflowOptions"] = json.dumps(
                        self._cromwell_config_file, default=str)
                else:
                    payload["workflowOptions"] = open_payload_file(
                        self._cromwell_config_file)

            LOG.info(
                f"Executing cromwell server '{self._cromwell_api_url}' with inputs "
                f"{json.dumps(inputs_dict, default=str)}")

            with requests.post(self._cromwell_api_url,
                               files=payload,
                               auth=self._auth) as resp:
                status_object = self._resp_to_json(resp, target, inputs_dict)
                run_id = status_object["id"]
                LOG.info(
                    f"Executing on cromwell with id {run_id}. Waiting until terminal "
                    f"state is reached")
        finally:
            for fh in payload_files:
                try:
                    fh.close()
                except:
                    LOG.exception("Error closing file %s", fh)

        self._poll_until_terminal(
            run_id, target, inputs_dict,
            kwargs.get("timeout", DEFAULT_POLLING_TIMEOUT))

        metadata_url = f"{self._cromwell_api_url}/{run_id}/metadata"
        outputs = None

        with requests.get(metadata_url, auth=self._auth) as metadata_response:
            metadata = self._resp_to_json(metadata_response, target,
                                          inputs_dict)

            if metadata["status"] == "Succeeded":
                outputs = metadata["outputs"]
            else:
                error_kwargs = {
                    "executor": "cromwell",
                    "target": target,
                    "status": "Failed",
                    "inputs": inputs_dict,
                }
                self._parse_metadata_errors(metadata,
                                            target=target,
                                            error_kwargs=error_kwargs)
                raise ExecutionFailedError(**error_kwargs)

        if expected:
            self._validate_outputs(outputs, expected, target)

        return outputs