def diff_default(file1: Path, file2: Path) -> int: """ Default diff command. Args: file1: First file to compare file2: Second file to compare Returns: Number of different lines. """ with tempdir() as temp: # Remove trailing whitespace, and ensure a newline at the end of the file cmp_file1 = temp / "file1" cmp_file2 = temp / "file2" subby.run("sed 's/[[:space:]]*$//; $a\\'", stdin=file1, stdout=cmp_file1) subby.run("sed 's/[[:space:]]*$//; $a\\'", stdin=file2, stdout=cmp_file2) # diff - it would be possible to do this without sed using GNU diff with the # `--ignore-trailing-space` option, but unfortunately that option is not # available in macOS diff, which provides BSD versions of the tools by default. cmds = [ f"diff -y --suppress-common-lines {cmp_file1} {cmp_file2}", "grep -c '^'" ] # It's a valid result to have no lines match, so allow a grep returncode of 1 return int(subby.sub(cmds, allowed_return_codes=(0, 1)))
def test_allowed_returncodes(mode, expected): with pytest.raises(subprocess.CalledProcessError): # This raises an exception because grep has a returncode of 1 # when no lines match subby.run("echo foo | grep -c bar", mode=mode) assert subby.run("echo foo | grep -c bar", mode=mode, allowed_return_codes=(0, 1)).output == expected
def test_run_str_command(mode, expected): with isolated_dir(): p = subby.run("echo -n 'foo' | gzip", stdout=Path("foo.txt.gz"), block=True, mode=mode) assert p.done and p.closed assert expected == subby.run("gunzip -c foo.txt.gz | cat", block=True, mode=mode).output
def test_run(mode, expected): with isolated_dir(): p = subby.run(["echo -n 'foo'", "gzip"], stdout=Path("foo.txt.gz"), block=True, mode=mode) assert p.done and p.closed assert expected == subby.run(["gunzip -c foo.txt.gz", "cat"], block=True, mode=mode).output
def test_run_noblock(mode, expected): with isolated_dir(): p = subby.run(["echo -n 'foo'", "gzip"], stdout=Path("foo.txt.gz"), block=False, mode=mode) assert not p.done assert p.stdin_type is subby.StdType.OTHER assert p.stdout_type is subby.StdType.FILE assert p.stderr_type is subby.StdType.PIPE p.block() assert p.done and p.closed assert expected == subby.run(["gunzip -c foo.txt.gz", "cat"], block=True, mode=mode).output
def test_readme_examples(): # We can pass input to the stdin of the command as bytes input_str = "foo\nbar" # The following three commands are equivalent; each returns a # `subby.Processes` object that can be used to inspect and control # the process(es). p1 = subby.run([["grep", "foo"], ["wc", "-l"]], stdin=input_str) p2 = subby.run(("grep foo", "wc -l"), stdin=input_str) p3 = subby.run("grep foo | wc -l", stdin=input_str) # The `done` property tells us whether the processes have finished assert p1.done and p2.done and p3.done # The `output` property provides the output of the command assert p1.output == p2.output == p3.output == "1"
def test_cromwell_server_workflow(user_config, workflow_data, workflow_runner): cromwell_jar_file = user_config.get_executor_defaults("cromwell").get( "cromwell_jar_file", os.environ.get(ENV_CROMWELL_JAR) ) java_jar = user_config.get_executor_defaults("cromwell").get( "java_bin", os.environ.get(ENV_JAVA_HOME) + "/bin/java" ) p = subby.run( f"{java_jar} -jar {cromwell_jar_file} server | tee /dev/stderr", block=False ) time.sleep(10) inputs = { "in_txt": workflow_data["in_txt"], "in_int": 1 } outputs = { "out_txt": workflow_data["out_txt"], "out_int": 1 } try: workflow_runner( "test.wdl", inputs, outputs, executors=["cromwell-server"] ) finally: p.kill()
def _get_workflow_imports(self, imports_file: Optional[Path] = None) -> Path: """ Creates a ZIP file with all WDL files to be imported. Args: imports_file: Text file naming import directories/files - one per line. Returns: Path to the ZIP file. """ write_imports = bool(self._import_dirs) imports_path = None if imports_file: imports_path = ensure_path(imports_file) if imports_path.exists(): write_imports = False if write_imports and self._import_dirs: imports = [ wdl for path in self._import_dirs for wdl in glob.glob(str(path / "*.wdl")) ] if imports: if imports_path: ensure_path(imports_path, is_file=True, create=True) else: imports_path = Path(tempfile.mkstemp(suffix=".zip")[1]) imports_str = " ".join(imports) LOG.info( f"Writing imports {imports_str} to zip file {imports_path}" ) exe = subby.run(f"zip -j - {imports_str}", mode=bytes, stdout=imports_path, raise_on_error=False) if not exe.ok: raise Exception( f"Error creating imports zip file; stdout={exe.output}; " f"stderr={exe.error}") return imports_path
def test_shell(): with pytest.raises(FileNotFoundError): # We expect FileNotFound because exit is a shell-specific command and won't # be recognized unless we run in the shell subby.run("exit 2") try: subby.run("exit 2", shell="/bin/sh") raise AssertionError("Expected error") except subprocess.CalledProcessError as err: assert err.returncode == 2 try: subby.run("exit 2", shell=True) raise AssertionError("Expected error") except subprocess.CalledProcessError as err: assert err.returncode == 2
def make_comparable(inpath, output): subby.run(f"cat {inpath} | cut -f {columns}", stdout=output)
def run_workflow(self, wdl_path: Path, inputs: Optional[dict] = None, expected: Optional[dict] = None, **kwargs) -> dict: """ Run a WDL workflow on given inputs, and check that the output matches given expected values. Args: wdl_path: The WDL script to execute. inputs: Object that will be serialized to JSON and provided to Cromwell as the workflow inputs. expected: Dict mapping output parameter names to expected values. kwargs: Additional keyword arguments, mostly for debugging: * workflow_name: The name of the workflow in the WDL script. If None, the name of the WDL script is used (without the .wdl extension). * inputs_file: Path to the Cromwell inputs file to use. Inputs are written to this file only if it doesn't exist. * imports_file: Path to the WDL imports file to use. Imports are written to this file only if it doesn't exist. * java_args: Additional arguments to pass to Java runtime. * cromwell_args: Additional arguments to pass to `cromwell run`. Returns: Dict of outputs. Raises: ExecutionFailedError: if there was an error executing Cromwell AssertionError: if the actual outputs don't match the expected outputs """ target, is_task = get_target_name(wdl_path=wdl_path, import_dirs=self._import_dirs, **kwargs) if is_task: raise ValueError( "Cromwell cannot execute tasks independently of a workflow") inputs_dict, inputs_file = read_write_inputs(inputs_dict=inputs, namespace=target) imports_file = self._get_workflow_imports(kwargs.get("imports_file")) inputs_arg = f"-i {inputs_file}" if inputs_file else "" imports_zip_arg = f"-p {imports_file}" if imports_file else "" java_args = kwargs.get("java_args", self.java_args) or "" cromwell_args = kwargs.get("cromwell_args", self._cromwell_args) or "" metadata_file = Path.cwd() / "metadata.json" cmd = ( f"{self.java_bin} {java_args} -jar {self._cromwell_jar_file} run " f"-m {metadata_file} {cromwell_args} {inputs_arg} {imports_zip_arg} " f"{wdl_path}") LOG.info(f"Executing cromwell command '{cmd}' with inputs " f"{json.dumps(inputs_dict, default=str)}") exe = subby.run(cmd, raise_on_error=False) metadata = None if metadata_file.exists(): with open(metadata_file, "rt") as inp: metadata = json.load(inp) if exe.ok: if metadata: assert metadata["status"] == "Succeeded" outputs = metadata["outputs"] else: LOG.warning( f"Cromwell command completed successfully but did not generate " f"a metadata file at {metadata_file}") outputs = self._get_cromwell_outputs(exe.output) else: error_kwargs = { "executor": "cromwell", "target": target, "status": "Failed", "inputs": inputs_dict, "executor_stdout": exe.output, "executor_stderr": exe.error, } if metadata: failures = self._get_failures(metadata) if failures: error_kwargs.update({ "failed_task": failures.failed_task, "failed_task_exit_status": failures.failed_task_exit_status, "failed_task_stdout": failures.failed_task_stdout, "failed_task_stderr": failures.failed_task_stderr }) if failures.num_failed > 1: error_kwargs["msg"] = \ f"cromwell failed on {failures.num_failed} instances of " \ f"{failures.failed_task} of {target}; only " \ f"showing output from the first failed task" else: error_kwargs[ "msg"] = f"cromwell failed on workflow {target}" else: error_kwargs["msg"] = \ f"Cromwell command failed but did not generate a metadata " \ f"file at {metadata_file}" raise ExecutionFailedError(**error_kwargs) if expected: self._validate_outputs(outputs, expected, target) return outputs
def test_get_all_stderr(mode, expected, expected_0): # This command should write to stderr of the second and # third commands, and stdout of the third command p = subby.run("echo -n hi | tee /dev/stderr | tee /dev/stderr", mode=mode) assert p.output == expected assert p.get_all_stderr() == [expected_0, expected, expected]