def compress_files(files, archive, path=None, overwrite=True): """Compress `files` into an `archive` file Parameters ---------- files : list of str archive : str path : str Alternative directory under which compressor will be invoked, to e.g. take into account relative paths of files and/or archive overwrite : bool Whether to allow overwriting the target archive file if one already exists """ runner = Runner(cwd=path) apath = Path(archive) if apath.exists(): if overwrite: apath.unlink() else: raise ValueError( 'Target archive {} already exists and overwrite is forbidden'. format(apath)) suffixes = _normalize_fname_suffixes(apath.suffixes) if len(suffixes) > 1 and suffixes[-2] == '.tar': cmd = '7z u .tar -so -- {} | 7z u -si -- {}'.format( join_cmdline(files), quote_cmdlinearg(str(apath)), ) else: cmd = ['7z', 'u', str(apath), '--'] + files runner.run(cmd, protocol=KillOutput)
def get_7z(self): from datalad.cmd import CommandError, StdOutErrCapture, WitlessRunner # from datalad.utils import on_windows runner = WitlessRunner() # TODO: To not rely on availability in PATH we might want to use `which` # (`where` on windows) and get the actual path to 7z to re-use in # in_archive() and get(). # Note: `command -v XXX` or `type` might be cross-platform # solution! # However, for availability probing only, it would be sufficient # to just call 7z and see whether it returns zero. # cmd = 'where' if on_windows else 'which' # try: # out = runner.run([cmd, '7z'], protocol=StdOutErrCapture) # return out['stdout'] # except CommandError: # return None try: runner.run('7z', protocol=StdOutErrCapture) return True except (FileNotFoundError, CommandError): return False
def test_custom_call_fmt(path, local_file): ds = Dataset(path).create() subds = ds.create('sub') # plug in a proper singularity image subds.containers_add( 'mycontainer', url=get_local_file_url(op.join(local_file, 'some_container.img')), image='righthere', call_fmt='echo image={img} cmd={cmd} img_dspath={img_dspath} ' # and environment variable being set/propagated by default 'name=$DATALAD_CONTAINER_NAME') ds.save() # record the effect in super-dataset # Running should work fine either withing sub or within super out = WitlessRunner(cwd=subds.path).run( ['datalad', 'containers-run', '-n', 'mycontainer', 'XXX'], protocol=StdOutCapture) assert_in('image=righthere cmd=XXX img_dspath=. name=mycontainer', out['stdout']) out = WitlessRunner(cwd=ds.path).run( ['datalad', 'containers-run', '-n', 'sub/mycontainer', 'XXX'], protocol=StdOutCapture) assert_in('image=sub/righthere cmd=XXX img_dspath=sub', out['stdout']) # Test within subdirectory of the super-dataset subdir = op.join(ds.path, 'subdir') os.mkdir(subdir) out = WitlessRunner(cwd=subdir).run( ['datalad', 'containers-run', '-n', 'sub/mycontainer', 'XXX'], protocol=StdOutCapture) assert_in('image=../sub/righthere cmd=XXX img_dspath=../sub', out['stdout'])
def test_runner_failure(dir_): runner = Runner() with assert_raises(CommandError) as cme: runner.run( py2cmd('import sys; sys.exit(53)') ) eq_(53, cme.exception.code)
def _execute_command(command, pwd, expected_exit=None): from datalad.cmd import WitlessRunner exc = None cmd_exitcode = None runner = WitlessRunner(cwd=pwd) try: lgr.info("== Command start (output follows) =====") runner.run( # command is always a string command) except CommandError as e: # strip our own info from the exception. The original command output # went to stdout/err -- we just have to exitcode in the same way exc = e cmd_exitcode = e.code if expected_exit is not None and expected_exit != cmd_exitcode: # we failed in a different way during a rerun. This can easily # happen if we try to alter a locked file # # TODO add the ability to `git reset --hard` the dataset tree on failure # we know that we started clean, so we could easily go back, needs gh-1424 # to be able to do it recursively raise exc lgr.info("== Command exit (modification check follows) =====") return cmd_exitcode or 0, exc
def test_cfg_passthrough(path=None): runner = Runner() _ = runner.run([ 'datalad', '-c', 'annex.tune.objecthash1=true', '-c', 'annex.tune.objecthashlower=true', 'create', path ]) ds = Dataset(path) eq_(ds.config.get('annex.tune.objecthash1', None), 'true') eq_(ds.config.get('annex.tune.objecthashlower', None), 'true')
def test_runner_stdout_capture(): runner = Runner() test_msg = "stdout-Message" res = runner.run( py2cmd('import sys; print(%r, file=sys.stdout)' % test_msg), protocol=StdOutErrCapture, ) eq_(res['stdout'].rstrip(), test_msg) ok_(not res['stderr'])
def test_runner_fix_PWD(path): env = os.environ.copy() env['PWD'] = orig_cwd = os.getcwd() runner = Runner(cwd=path, env=env) res = runner.run( py2cmd('import os; print(os.environ["PWD"])'), protocol=StdOutCapture, ) eq_(res['stdout'].strip(), path) # was fixed up to point to point to cwd's path eq_(env['PWD'], orig_cwd) # no side-effect
def test_runner(tempfile): runner = Runner() content = 'Testing real run' if on_windows else 'Testing äöü東 real run' cmd = 'echo %s > %s' % (content, tempfile) res = runner.run(cmd) # no capture of any kind, by default ok_(not res['stdout']) ok_(not res['stderr']) ok_file_has_content(tempfile, content, strip=True) os.unlink(tempfile)
def test_addurls_stdin_input_command_line(self=None, path=None): # The previous test checks all the cases, but it overrides sys.stdin. # Do a simple check that's closer to a command line call. Dataset(path).create(force=True) runner = WitlessRunner(cwd=path) with open(self.json_file) as jfh: runner.run(["datalad", "addurls", '-', '{url}', '{name}'], stdin=jfh) for fname in ["a", "b", "c"]: ok_exists(op.join(path, fname))
def check_run_and_get_output(cmd): runner = WitlessRunner() try: # suppress log output happen it was set to high values with patch.dict('os.environ', {'DATALAD_LOG_LEVEL': 'WARN'}): output = runner.run(["datalad", "--help"], protocol=StdOutErrCapture) except CommandError as e: raise AssertionError("'datalad --help' failed to start normally. " "Exited with %d and output %s" % (e.code, (e.stdout, e.stderr))) return output['stdout'], output['stderr']
def test_no_stdin_swallow(fname=None): # will relay actual exit code on CommandError cmd = ['datalad', 'sshrun', 'datalad-test', 'cat'] out = WitlessRunner().run(cmd, stdin=open(fname), protocol=StdOutCapture) assert_equal(out['stdout'].rstrip(), '123magic') # test with -n switch now, which we could place even at the end out = WitlessRunner().run(cmd + ['-n'], stdin=open(fname), protocol=StdOutCapture) assert_equal(out['stdout'], '')
def test_no_rdflib_loaded(): # rely on rdflib polluting stdout to see that it is not loaded whenever we load this remote # since that adds 300ms delay for no immediate use runner = WitlessRunner() out = runner.run([ sys.executable, '-c', 'import datalad.customremotes.archives, sys; ' 'print([k for k in sys.modules if k.startswith("rdflib")])' ], protocol=StdOutErrCapture) # print cmo.out assert_not_in("rdflib", out['stdout']) assert_not_in("rdflib", out['stderr'])
def test_runner_stdin(path): runner = Runner() fakestdin = Path(path) / 'io' # go for difficult content fakestdin.write_text(OBSCURE_FILENAME) res = runner.run( py2cmd('import fileinput; print(fileinput.input().readline())'), stdin=fakestdin.open(), protocol=StdOutCapture, ) assert_in(OBSCURE_FILENAME, res['stdout'])
class witlessrunner(SuprocBenchmarks): """Some rudimentary tests to see if there is no major slowdowns of WitlessRunner """ def setup(self): self.runner = WitlessRunner() self.git_runner = GitWitlessRunner() def time_echo(self): self.runner.run(["echo"]) def time_echo_gitrunner(self): self.git_runner.run(["echo"]) def time_echo_gitrunner_fullcapture(self): self.git_runner.run(["echo"], protocol=StdOutErrCapture)
def _execute_command(command, pwd): from datalad.cmd import WitlessRunner exc = None cmd_exitcode = None runner = WitlessRunner(cwd=pwd) try: lgr.info("== Command start (output follows) =====") runner.run( # command is always a string command) except CommandError as e: exc = e cmd_exitcode = e.code lgr.info("== Command exit (modification check follows) =====") return cmd_exitcode or 0, exc
def in_archive(self, archive_path, file_path): if not archive_path.exists(): # no archive, not file return False loc = str(file_path) from datalad.cmd import ( StdOutErrCapture, WitlessRunner, ) runner = WitlessRunner() # query 7z for the specific object location, keeps the output # lean, even for big archives out = runner.run( ['7z', 'l', str(archive_path), loc], protocol=StdOutErrCapture, ) return loc in out['stdout']
def test_completion(out_fn=None): skip_if_no_module('argcomplete') from datalad.cmd import WitlessRunner runner = WitlessRunner() def get_completions(s: str, expected) -> list: """Run 'datalad' external command and collect completions Parameters ---------- s: str what to append to 'datalad ' invocation expected: iterable of str What entries to expect - would raise AssertionError if any is not present in output exit_code: int, optional If incomplete/malformed we seems to get 2, most frequently used so default Returns ------- list of str Entries output """ if os.path.exists(out_fn): # reuse but ensure it is gone os.unlink(out_fn) comp_line = f'datalad {s}' runner.run( comp_line.split(' '), env=dict( os.environ, _ARGCOMPLETE='1', _ARGCOMPLETE_STDOUT_FILENAME=out_fn, COMP_LINE=comp_line, # without -1 seems to get "finished completion", someone can investigate more COMP_POINT=str(len(comp_line) - 1), # always at the end ATM )) with open(out_fn, 'rb') as f: entries = f.read().split(b'\x0b') entries = [e.decode() for e in entries] diff = set(expected).difference(entries) if diff: raise AssertionError( f"Entries {sorted(diff)} were expected but not found in the completion output: {entries}" ) return entries # for extra analyzes if so desired all_commands = get_all_commands() get_completions('i', {'install'}) get_completions(' ', ['--dbg', '-c'] + all_commands) # if command already matches -- we get only that hit ATM, not others which begin with it get_completions('create', ['create ']) get_completions('create -', ['--dataset']) # but for incomplete one we do get all create* commands get_completions('creat', [c for c in all_commands if c.startswith('create')])
def test_runner_parametrized_protocol(): runner = Runner() # protocol returns a given value whatever it receives class ProtocolInt(StdOutCapture): def __init__(self, value): self.value = value super().__init__() def pipe_data_received(self, fd, data): super().pipe_data_received(fd, self.value) res = runner.run( py2cmd('print(1)'), protocol=ProtocolInt, # value passed to protocol constructor value=b'5', ) eq_(res['stdout'], '5')
def setup_class(cls): cls.image_name = "busybox:latest" if images_exist([cls.image_name]): cls.image_existed = True else: cls.image_existed = False try: WitlessRunner().run(["docker", "pull", cls.image_name]) except CommandError: # This is probably due to rate limiting. raise SkipTest("Plain `docker pull` failed; skipping")
def test_script_shims(): runner = Runner() # The EASY-INSTALL checks below aren't valid for editable installs. Use the # existence of setup.py as an indication that install is _probably_ # editable. The file should always exist for editable installs, but it can # also exist for non-editable installs when the tests are being executed # from the top of the source tree. setup_exists = (Path(datalad.__file__).parent.parent / "setup.py").exists() for script in [ 'datalad', 'git-annex-remote-datalad-archives', 'git-annex-remote-datalad' ]: if not on_windows: # those must be available for execution, and should not contain which = runner.run(['which', script], protocol=StdOutErrCapture)['stdout'] # test if there is no easy install shim in there with open(which.rstrip()) as f: content = f.read() else: from distutils.spawn import find_executable content = find_executable(script) if not setup_exists: assert_not_in('EASY', content) # NOTHING easy should be there assert_not_in('pkg_resources', content) # and let's check that it is our script out = runner.run([script, '--version'], protocol=StdOutErrCapture) version = out['stdout'].rstrip() mod, version = version.split(' ', 1) assert_equal(mod, 'datalad') # we can get git and non git .dev version... so for now # relax get_numeric_portion = lambda v: [ x for x in v.split('.') if x.isdigit() ] # extract numeric portion assert get_numeric_portion(version) # that my lambda is correctish assert_equal(get_numeric_portion(__version__), get_numeric_portion(version))
def decompress_file(archive, dir_): """Decompress `archive` into a directory `dir_` This is an alternative implementation without patool, but directly calling 7z. Parameters ---------- archive: str dir_: str """ apath = Path(archive) runner = Runner(cwd=dir_) suffixes = _normalize_fname_suffixes(apath.suffixes) if len(suffixes) > 1 and suffixes[-2] == '.tar': # we have a compressed tar file that needs to be fed through the # decompressor first cmd = '7z x {} -so | 7z x -si -ttar'.format(quote_cmdlinearg(archive)) else: # fire and forget cmd = ['7z', 'x', archive] runner.run(cmd, protocol=KillOutput)
def get_interactive(py_pre="", **run_kwargs): out = WitlessRunner().run([ sys.executable, "-c", py_pre + 'from datalad.utils import is_interactive; ' 'f = open(%r, "w"); ' 'f.write(str(is_interactive())); ' 'f.close()' % fout ], **run_kwargs) with open(fout) as f: out = f.read() assert_in(out, bools) return bool(bools.index(out))
def test_external_modification(path=None): from datalad.cmd import WitlessRunner as Runner runner = Runner(cwd=path) repo = GitRepo(path, create=True) config = repo.config key = 'sec.sub.key' assert_not_in(key, config) config.set(key, '1', scope='local') assert_equal(config[key], '1') # we pick up the case where we modified so size changed runner.run(['git', 'config', '--local', '--replace-all', key, '10']) # unfortunately we do not react for .get unless reload. But here # we will test if reload is correctly decides to reload without force config.reload() assert_equal(config[key], '10') # and no size change runner.run(['git', 'config', '--local', '--replace-all', key, '11']) config.reload() assert_equal(config[key], '11')
def test_script_shims(script): runner = Runner() if not on_windows: from shutil import which which(script) # and let's check that it is our script out = runner.run([script, '--version'], protocol=StdOutErrCapture) version = out['stdout'].rstrip() mod, version = version.split(' ', 1) assert_equal(mod, 'datalad') # we can get git and non git .dev version... so for now # relax get_numeric_portion = lambda v: [ x for x in re.split('[+.]', v) if x.isdigit() ] # extract numeric portion assert get_numeric_portion( version), f"Got no numeric portion from {version}" assert_equal(get_numeric_portion(__version__), get_numeric_portion(version))
def test_asyncio_forked(temp): # temp will be used to communicate from child either it succeeded or not temp = Path(temp) runner = Runner() import os try: pid = os.fork() except BaseException as exc: # .fork availability is "Unix", and there are cases where it is "not supported" # so we will just skip if no forking is possible raise SkipTest(f"Cannot fork: {exc}") # if does not fail (in original or in a fork) -- we are good if sys.version_info < (3, 8) and pid != 0: # for some reason it is crucial to sleep a little (but 0.001 is not enough) # in the master process with older pythons or it takes forever to make the child run sleep(0.1) try: runner.run([sys.executable, '--version'], protocol=StdOutCapture) if pid == 0: temp.write_text("I rule") except: if pid == 0: temp.write_text("I suck") if pid != 0: # parent: look after the child t0 = time() try: while not temp.exists() or temp.stat().st_size < 6: if time() - t0 > 5: raise AssertionError( "Child process did not create a file we expected!") finally: # kill the child os.kill(pid, signal.SIGTERM) # see if it was a good one eq_(temp.read_text(), "I rule") else: # sleep enough so parent just kills me the kid before I continue doing bad deeds sleep(10)
def test_containers_run(self, path): if self.image_existed: raise SkipTest( "Not pulling with containers-run due to existing image: {}" .format(self.image_name)) from datalad.api import Dataset ds = Dataset(path).create(force=True) ds.save(path="foo") ds.containers_add("bb", url="dhub://" + self.image_name) out = WitlessRunner(cwd=ds.path).run( ["datalad", "containers-run", "-n", "bb", "cat foo"], protocol=StdOutCapture) assert_in("content", out["stdout"]) # Data can be received on stdin. with (ds.pathobj / "foo").open() as ifh: out = WitlessRunner(cwd=ds.path).run( ["datalad", "containers-run", "-n", "bb", "cat"], protocol=StdOutCapture, stdin=ifh) assert_in("content", out["stdout"])
def test_docker(path): # Singularity's "docker://" scheme. ds = Dataset(path).create() ds.containers_add( "bb", url=("docker://busybox@sha256:" "7964ad52e396a6e045c39b5a44438424ac52e12e4d5a25d94895f2058cb863a0" )) img = op.join(ds.path, ".datalad", "environments", "bb", "image") assert_result_count(ds.containers_list(), 1, path=img, name="bb") ok_clean_git(path) WitlessRunner(cwd=ds.path).run( ["datalad", "containers-run", "ls", "/singularity"], protocol=StdOutCapture)
def test_create(probe, path): # only as a probe whether this FS is a crippled one ar = AnnexRepo(probe, create=True) ds = Dataset(path) ds.create( description="funny", # custom git init option initopts=dict(shared='world') if not ar.is_managed_branch() else None) ok_(ds.is_installed()) assert_repo_status(ds.path, annex=True) # check default backend eq_(ds.config.get("annex.backends"), 'MD5E') if not ar.is_managed_branch(): eq_(ds.config.get("core.sharedrepository"), '2') runner = Runner() # check description in `info` cmd = ['git', 'annex', 'info'] cmlout = runner.run(cmd, cwd=path, protocol=StdOutErrCapture)['stdout'] assert_in('funny [here]', cmlout) # check datset ID eq_(ds.config.get_value('datalad.dataset', 'id'), ds.id)
def test_save_and_run(self, path): image_dir = op.join(path, "image") call(["save", self.image_name, image_dir]) ok_exists(op.join(image_dir, "manifest.json")) img_ids = list_images([self.image_name]) assert len(img_ids) == 1 eq_("sha256:" + da.get_image(image_dir), img_ids[0]) if not self.image_existed: WitlessRunner().run(["docker", "rmi", self.image_name]) out = call(["run", image_dir, "ls"], cwd=path, protocol=StdOutCapture) assert images_exist([self.image_name]) assert_in("image", out["stdout"])