Beispiel #1
0
def test_get_workflow_inputs():
    actual_inputs_dict, inputs_path = get_workflow_inputs({"bar": 1},
                                                          namespace="foo")
    assert inputs_path.exists()
    with open(inputs_path, "rt") as inp:
        assert json.load(inp) == actual_inputs_dict
    assert actual_inputs_dict == {"foo.bar": 1}

    with tempdir() as d:
        inputs_file = d / "inputs.json"
        actual_inputs_dict, inputs_path = get_workflow_inputs({"bar": 1},
                                                              inputs_file,
                                                              "foo")
        assert inputs_file == inputs_path
        assert inputs_path.exists()
        with open(inputs_path, "rt") as inp:
            assert json.load(inp) == actual_inputs_dict
        assert actual_inputs_dict == {"foo.bar": 1}

    with tempdir() as d:
        inputs_file = d / "inputs.json"
        inputs_dict = {"foo.bar": 1}
        with open(inputs_file, "wt") as out:
            json.dump(inputs_dict, out)
        actual_inputs_dict, inputs_path = get_workflow_inputs(
            inputs_file=inputs_file, namespace="foo")
        assert inputs_file == inputs_path
        assert inputs_path.exists()
        with open(inputs_path, "rt") as inp:
            assert json.load(inp) == actual_inputs_dict
        assert actual_inputs_dict == inputs_dict
Beispiel #2
0
    def run_workflow(self,
                     wdl_path: Union[str, Path],
                     inputs: Optional[dict] = None,
                     expected: Optional[dict] = None,
                     **kwargs) -> dict:
        """
        Run a WDL workflow on given inputs, and check that the output matches
        given expected values.

        Args:
            wdl_path: The WDL script to execute.
            inputs: Object that will be serialized to JSON and provided to Cromwell
                as the workflow inputs.
            expected: Dict mapping output parameter names to expected values.
            kwargs: Additional keyword arguments, mostly for debugging:
                * workflow_name: The name of the workflow in the WDL script. If None,
                    the name of the WDL script is used (without the .wdl extension).
                * inputs_file: Path to the Cromwell inputs file to use. Inputs are
                    written to this file only if it doesn't exist.
                * imports_file: Path to the WDL imports file to use. Imports are
                    written to this file only if it doesn't exist.
                * java_args: Additional arguments to pass to Java runtime.
                * cromwell_args: Additional arguments to pass to `cromwell run`.

        Returns:
            Dict of outputs.

        Raises:
            ExecutionFailedError: if there was an error executing Cromwell
            AssertionError: if the actual outputs don't match the expected outputs
        """
        workflow_name = self._get_workflow_name(wdl_path, kwargs)

        inputs_dict, inputs_file = get_workflow_inputs(
            inputs, kwargs.get("inputs_file"), workflow_name)

        imports_file = self.get_workflow_imports(kwargs.get("imports_file"))

        inputs_arg = f"-i {inputs_file}" if inputs_dict else ""
        imports_zip_arg = f"-p {imports_file}" if imports_file else ""
        java_args = kwargs.get("java_args", self.java_args) or ""
        cromwell_args = kwargs.get("cromwell_args", self.cromwell_args) or ""
        metadata_file = Path.cwd() / "metadata.json"

        cmd = (
            f"{self.java_bin} {java_args} -jar {self.cromwell_jar_file} run "
            f"-m {metadata_file} {cromwell_args} {inputs_arg} {imports_zip_arg} "
            f"{wdl_path}")
        LOG.info(f"Executing cromwell command '{cmd}' with inputs "
                 f"{json.dumps(inputs_dict, default=str)}")

        exe = subby.run(cmd, raise_on_error=False)

        metadata = None
        if metadata_file.exists():
            with open(metadata_file, "rt") as inp:
                metadata = json.load(inp)

        if exe.ok:
            if metadata:
                assert metadata["status"] == "Succeeded"
                outputs = metadata["outputs"]
            else:
                LOG.warning(
                    f"Cromwell command completed successfully but did not generate "
                    f"a metadata file at {metadata_file}")
                outputs = CromwellExecutor.get_cromwell_outputs(exe.output)
        else:
            error_kwargs = {
                "executor": "cromwell",
                "target": workflow_name,
                "status": "Failed",
                "inputs": inputs_dict,
                "executor_stdout": exe.output,
                "executor_stderr": exe.error,
            }
            if metadata:
                failures = CromwellExecutor.get_failures(metadata)
                if failures:
                    error_kwargs.update({
                        "failed_task":
                        failures.failed_task,
                        "failed_task_exit_status":
                        failures.failed_task_exit_status,
                        "failed_task_stdout":
                        failures.failed_task_stdout,
                        "failed_task_stderr":
                        failures.failed_task_stderr
                    })
                    if failures.num_failed > 1:
                        error_kwargs["msg"] = \
                            f"cromwell failed on {failures.num_failed} instances of " \
                            f"{failures.failed_task} of {workflow_name}; only " \
                            f"showing output from the first failed task"
                else:
                    error_kwargs[
                        "msg"] = f"cromwell failed on workflow {workflow_name}"
            else:
                error_kwargs["msg"] = \
                    f"Cromwell command failed but did not generate a metadata " \
                    f"file at {metadata_file}"

            raise ExecutionFailedError(**error_kwargs)

        if expected:
            validate_outputs(outputs, expected, workflow_name)

        return outputs
Beispiel #3
0
    def run_workflow(self,
                     wdl_script: Union[str, Path],
                     workflow_name: Optional[str] = None,
                     inputs: Optional[dict] = None,
                     expected: Optional[dict] = None,
                     **kwargs) -> dict:
        """
        Run a WDL workflow on given inputs, and check that the output matches
        given expected values.

        Args:
            wdl_script: The WDL script to execute.
            workflow_name: The name of the workflow in the WDL script. If None, the
                name of the WDL script is used (without the .wdl extension).
            inputs: Object that will be serialized to JSON and provided to Cromwell
                as the workflow inputs.
            expected: Dict mapping output parameter names to expected values.
            kwargs: Additional keyword arguments, mostly for debugging:
                * inputs_file: Path to the Cromwell inputs file to use. Inputs are
                    written to this file only if it doesn't exist.
                * imports_file: Path to the WDL imports file to use. Imports are
                    written to this file only if it doesn't exist.
                * java_args: Additional arguments to pass to Java runtime.
                * cromwell_args: Additional arguments to pass to `cromwell run`.

        Returns:
            Dict of outputs.

        Raises:
            Exception: if there was an error executing Cromwell
            AssertionError: if the actual outputs don't match the expected outputs
        """
        wdl_path, workflow_name = get_workflow(
            self.project_root,
            wdl_script,
            workflow_name,
        )

        inputs_dict, inputs_file = get_workflow_inputs(
            workflow_name, inputs, kwargs.get("inputs_file"))

        imports_file = get_workflow_imports(self.import_dirs,
                                            kwargs.get("imports_file"))

        inputs_arg = f"-i {inputs_file}" if inputs_dict else ""
        imports_zip_arg = f"-p {imports_file}" if imports_file else ""
        java_args = kwargs.get("java_args", self.java_args) or ""
        cromwell_args = kwargs.get("cromwell_args", self.cromwell_args) or ""

        cmd = (
            f"{self.java_bin} {java_args} -jar {self.cromwell_jar_file} run "
            f"{cromwell_args} {inputs_arg} {imports_zip_arg} {wdl_path}")
        LOG.info(f"Executing cromwell command '{cmd}' with inputs "
                 f"{json.dumps(inputs_dict, default=str)}")
        exe = delegator.run(cmd, block=True)
        if not exe.ok:
            raise Exception(
                f"Cromwell command failed; stdout={exe.out}; stderr={exe.err}")

        outputs = CromwellExecutor.get_cromwell_outputs(exe.out)

        if expected:
            for name, expected_value in expected.items():
                key = f"{workflow_name}.{name}"
                if key not in outputs:
                    raise AssertionError(
                        f"Workflow did not generate output {key}")
                if isinstance(expected_value, DataFile):
                    expected_value.assert_contents_equal(outputs[key])
                else:
                    assert expected_value == outputs[key]

        return outputs
Beispiel #4
0
    def run_workflow(self,
                     wdl_path: Path,
                     inputs: Optional[dict] = None,
                     expected: Optional[dict] = None,
                     **kwargs) -> dict:
        """
        Run a WDL workflow on given inputs, and check that the output matches
        given expected values.

        Args:
            wdl_path: The WDL script to execute.
            inputs: Object that will be serialized to JSON and provided to Cromwell
                as the workflow inputs.
            expected: Dict mapping output parameter names to expected values.
            kwargs: Additional keyword arguments, mostly for debugging:
                * workflow_name: Name of the workflow to run.
                * task_name: Name of the task to run if a workflow isn't defined.
                * inputs_file: Path to the Cromwell inputs file to use. Inputs are
                    written to this file only if it doesn't exist.

        Returns:
            Dict of outputs.

        Raises:
            Exception: if there was an error executing Cromwell
            AssertionError: if the actual outputs don't match the expected outputs
        """

        doc = CLI.load(str(wdl_path),
                       path=[str(path) for path in self.import_dirs],
                       check_quant=kwargs.get("check_quant", True),
                       read_source=CLI.read_source)

        task = kwargs.get("task_name")
        namespace = None
        if not task:
            if "workflow_name" in kwargs:
                namespace = kwargs["workflow_name"]
            else:
                namespace = doc.workflow.name

        inputs_dict, inputs_file = get_workflow_inputs(
            inputs, kwargs.get("inputs_file"), namespace=namespace)

        target, input_env, input_json = CLI.runner_input(
            doc=doc,
            inputs=[],
            input_file=str(inputs_file),
            empty=[],
            task=task)

        logger = logging.getLogger("miniwdl-run")
        logger.setLevel(CLI.NOTICE_LEVEL)
        CLI.install_coloredlogs(logger)

        _util.ensure_swarm(logger)

        try:
            if isinstance(target, Tree.Task):
                entrypoint = runtime.run_local_task
            else:
                entrypoint = runtime.run_local_workflow
            rundir, output_env = entrypoint(
                target,
                input_env,
                #run_dir=rundir,
                #copy_input_files=copy_input_files,
                #max_workers=max_workers,
            )
        except Error.EvalError as err:  # TODO: test errors
            MiniwdlExecutor.log_source(logger, err)
            raise
        except Error.RuntimeError as err:
            MiniwdlExecutor.log_source(logger, err)

            if isinstance(err, runtime.error.RunFailed):
                # This will be a workflow- or a task-level failure, depending on
                # whether a workflow or task was executed. If it is workflow-level,
                # we need to get the task-level error that caused the workflow to fail.
                if isinstance(err.exe, Tree.Workflow):
                    err = err.__cause__

                task_err = cast(runtime.error.RunFailed, err)
                cause = task_err.__cause__
                failed_task_exit_status = None
                failed_task_stderr = None
                if isinstance(cause, runtime.error.CommandFailed):
                    # If the task failed due to an error in the command, populate the
                    # command exit status and stderr.
                    cmd_err = cast(runtime.error.CommandFailed, cause)
                    failed_task_exit_status = cmd_err.exit_status
                    failed_task_stderr = MiniwdlExecutor.read_miniwdl_command_std(
                        cmd_err.stderr_file)

                raise ExecutionFailedError(
                    "miniwdl",
                    namespace or task,
                    status="Failed",
                    inputs=task_err.exe.inputs,
                    failed_task=task_err.exe.name,
                    failed_task_exit_status=failed_task_exit_status,
                    failed_task_stderr=failed_task_stderr) from err
            else:
                raise

        outputs = CLI.values_to_json(output_env, namespace=target.name)

        if expected:
            validate_outputs(outputs, expected, target.name)

        return outputs