def run_workflow(self, wdl_path: Path, inputs: Optional[dict] = None, expected: Optional[dict] = None, **kwargs) -> dict: """ Run a WDL workflow on given inputs, and check that the output matches given expected values. Args: wdl_path: The WDL script to execute. inputs: Object that will be serialized to JSON and provided to Cromwell as the workflow inputs. expected: Dict mapping output parameter names to expected values. kwargs: Additional keyword arguments, mostly for debugging: * workflow_name: Name of the workflow to run. * task_name: Name of the task to run if a workflow isn't defined. * inputs_file: Path to the Cromwell inputs file to use. Inputs are written to this file only if it doesn't exist. Returns: Dict of outputs. Raises: Exception: if there was an error executing Cromwell AssertionError: if the actual outputs don't match the expected outputs """ wdl_doc = CLI.load(str(wdl_path), path=[str(path) for path in self._import_dirs], check_quant=kwargs.get("check_quant", True), read_source=CLI.read_source) namespace, is_task = get_target_name(wdl_doc=wdl_doc, **kwargs) inputs_dict, inputs_file = read_write_inputs( inputs_dict=inputs, namespace=namespace if not is_task else None, ) target, input_env, input_json = CLI.runner_input( doc=wdl_doc, inputs=[], input_file=str(inputs_file) if inputs_file else None, empty=[], task=namespace if is_task else None) logger = logging.getLogger("miniwdl-run") logger.setLevel(CLI.NOTICE_LEVEL) CLI.install_coloredlogs(logger) # initialize Docker client = docker.from_env() try: logger.debug("dockerd :: " + json.dumps(client.version())[1:-1]) _util.initialize_local_docker(logger, client) finally: client.close() try: if isinstance(target, Tree.Task): entrypoint = runtime.run_local_task else: entrypoint = runtime.run_local_workflow rundir, output_env = entrypoint(target, input_env, copy_input_files=kwargs.get( "copy_input_files", False)) except Error.EvalError as err: # TODO: test errors MiniwdlExecutor.log_source(logger, err) raise except Error.RuntimeError as err: MiniwdlExecutor.log_source(logger, err) if isinstance(err, runtime.error.RunFailed): # This will be a workflow- or a task-level failure, depending on # whether a workflow or task was executed. If it is workflow-level, # we need to get the task-level error that caused the workflow to fail. if isinstance(err.exe, Tree.Workflow): err = err.__cause__ task_err = cast(runtime.error.RunFailed, err) cause = task_err.__cause__ failed_task_exit_status = None failed_task_stderr = None if isinstance(cause, runtime.error.CommandFailed): # If the task failed due to an error in the command, populate the # command exit status and stderr. cmd_err = cast(runtime.error.CommandFailed, cause) failed_task_exit_status = cmd_err.exit_status failed_task_stderr = MiniwdlExecutor.read_miniwdl_command_std( cmd_err.stderr_file) raise ExecutionFailedError( "miniwdl", namespace, status="Failed", inputs=task_err.exe.inputs, failed_task=task_err.exe.name, failed_task_exit_status=failed_task_exit_status, failed_task_stderr=failed_task_stderr) from err else: raise outputs = CLI.values_to_json(output_env, namespace=target.name) if expected: self._validate_outputs(outputs, expected, target.name) return outputs
def run_workflow(self, wdl_path: Path, inputs: Optional[dict] = None, expected: Optional[dict] = None, **kwargs) -> dict: """ Run a WDL workflow on given inputs, and check that the output matches given expected values. Args: wdl_path: The WDL script to execute. inputs: Object that will be serialized to JSON and provided to Cromwell as the workflow inputs. expected: Dict mapping output parameter names to expected values. kwargs: Additional keyword arguments, mostly for debugging: * workflow_name: The name of the workflow in the WDL script. If None, the name of the WDL script is used (without the .wdl extension). * inputs_file: Path to the Cromwell inputs file to use. Inputs are written to this file only if it doesn't exist. * imports_file: Path to the WDL imports file to use. Imports are written to this file only if it doesn't exist. * java_args: Additional arguments to pass to Java runtime. * cromwell_args: Additional arguments to pass to `cromwell run`. Returns: Dict of outputs. Raises: ExecutionFailedError: if there was an error executing Cromwell AssertionError: if the actual outputs don't match the expected outputs """ target, is_task = get_target_name(wdl_path=wdl_path, import_dirs=self._import_dirs, **kwargs) if is_task: raise ValueError( "Cromwell cannot execute tasks independently of a workflow") inputs_dict, inputs_file = read_write_inputs(inputs_dict=inputs, namespace=target) imports_file = self._get_workflow_imports(kwargs.get("imports_file")) inputs_arg = f"-i {inputs_file}" if inputs_file else "" imports_zip_arg = f"-p {imports_file}" if imports_file else "" java_args = kwargs.get("java_args", self.java_args) or "" cromwell_args = kwargs.get("cromwell_args", self._cromwell_args) or "" metadata_file = Path.cwd() / "metadata.json" cmd = ( f"{self.java_bin} {java_args} -jar {self._cromwell_jar_file} run " f"-m {metadata_file} {cromwell_args} {inputs_arg} {imports_zip_arg} " f"{wdl_path}") LOG.info(f"Executing cromwell command '{cmd}' with inputs " f"{json.dumps(inputs_dict, default=str)}") exe = subby.run(cmd, raise_on_error=False) metadata = None if metadata_file.exists(): with open(metadata_file, "rt") as inp: metadata = json.load(inp) if exe.ok: if metadata: assert metadata["status"] == "Succeeded" outputs = metadata["outputs"] else: LOG.warning( f"Cromwell command completed successfully but did not generate " f"a metadata file at {metadata_file}") outputs = self._get_cromwell_outputs(exe.output) else: error_kwargs = { "executor": "cromwell", "target": target, "status": "Failed", "inputs": inputs_dict, "executor_stdout": exe.output, "executor_stderr": exe.error, } if metadata: failures = self._get_failures(metadata) if failures: error_kwargs.update({ "failed_task": failures.failed_task, "failed_task_exit_status": failures.failed_task_exit_status, "failed_task_stdout": failures.failed_task_stdout, "failed_task_stderr": failures.failed_task_stderr }) if failures.num_failed > 1: error_kwargs["msg"] = \ f"cromwell failed on {failures.num_failed} instances of " \ f"{failures.failed_task} of {target}; only " \ f"showing output from the first failed task" else: error_kwargs[ "msg"] = f"cromwell failed on workflow {target}" else: error_kwargs["msg"] = \ f"Cromwell command failed but did not generate a metadata " \ f"file at {metadata_file}" raise ExecutionFailedError(**error_kwargs) if expected: self._validate_outputs(outputs, expected, target) return outputs
def run_workflow( self, wdl_path: Path, inputs: Optional[dict] = None, expected: Optional[dict] = None, **kwargs, ) -> dict: """ Run a WDL workflow on given inputs, and check that the output matches given expected values. Args: wdl_path: The WDL script to execute. inputs: Object that will be serialized to JSON and provided to Cromwell as the workflow inputs. expected: Dict mapping output parameter names to expected values. kwargs: Additional keyword arguments, mostly for debugging: * workflow_name: The name of the workflow in the WDL script. If None, the name of the WDL script is used (without the .wdl extension). * inputs_file: Path to the Cromwell inputs file to use. Inputs are written to this file only if it doesn't exist. * imports_file: Path to the WDL imports file to use. Imports are written to this file only if it doesn't exist. * java_args: Additional arguments to pass to Java runtime. * cromwell_args: Additional arguments to pass to `cromwell run`. Returns: Dict of outputs. Raises: ExecutionFailedError: if there was an error executing Cromwell AssertionError: if the actual outputs don't match the expected outputs """ target, is_task = get_target_name(wdl_path=wdl_path, import_dirs=self._import_dirs, **kwargs) if is_task: raise ValueError( "Cromwell cannot execute tasks independently of a workflow") inputs_dict, _ = read_write_inputs( inputs_file=kwargs.get("inputs_file"), inputs_dict=inputs, namespace=target, write_formatted_inputs=False) payload = {} payload_files = [] def open_payload_file(path: Path, mode: str = "r") -> IO: open_file = open(path, mode) payload_files.append(open_file) return open_file try: payload["workflowSource"] = open_payload_file(wdl_path) if inputs_dict: payload["workflowInputs"] = json.dumps(inputs_dict, default=str) imports_file = self._get_workflow_imports( self._import_dirs, kwargs.get("imports_file")) if imports_file: payload["workflowDependencies"] = open_payload_file( imports_file, "rb") if self._cromwell_config_file: if isinstance(inputs_dict, dict): payload["workflowOptions"] = json.dumps( self._cromwell_config_file, default=str) else: payload["workflowOptions"] = open_payload_file( self._cromwell_config_file) LOG.info( f"Executing cromwell server '{self._cromwell_api_url}' with inputs " f"{json.dumps(inputs_dict, default=str)}") with requests.post(self._cromwell_api_url, files=payload, auth=self._auth) as resp: status_object = self._resp_to_json(resp, target, inputs_dict) run_id = status_object["id"] LOG.info( f"Executing on cromwell with id {run_id}. Waiting until terminal " f"state is reached") finally: for fh in payload_files: try: fh.close() except: LOG.exception("Error closing file %s", fh) self._poll_until_terminal( run_id, target, inputs_dict, kwargs.get("timeout", DEFAULT_POLLING_TIMEOUT)) metadata_url = f"{self._cromwell_api_url}/{run_id}/metadata" outputs = None with requests.get(metadata_url, auth=self._auth) as metadata_response: metadata = self._resp_to_json(metadata_response, target, inputs_dict) if metadata["status"] == "Succeeded": outputs = metadata["outputs"] else: error_kwargs = { "executor": "cromwell", "target": target, "status": "Failed", "inputs": inputs_dict, } self._parse_metadata_errors(metadata, target=target, error_kwargs=error_kwargs) raise ExecutionFailedError(**error_kwargs) if expected: self._validate_outputs(outputs, expected, target) return outputs