def in_archive(self, archive_path, file_path): if not archive_path.exists(): # no archive, not file return False loc = str(file_path) from datalad.cmd import Runner runner = Runner() # query 7z for the specific object location, keeps the output # lean, even for big archives out, err = runner( ['7z', 'l', str(archive_path), loc], log_stdout=True, ) return loc in out
def test_install_dataladri(src, topurl, path): # make plain git repo ds_path = opj(src, 'ds') gr = GitRepo(ds_path, create=True) gr.add('test.txt') gr.commit('demo') Runner(cwd=gr.path)(['git', 'update-server-info']) # now install it somewhere else with patch('datalad.consts.DATASETS_TOPURL', topurl), \ swallow_logs(): ds = install(path, source='///ds') eq_(ds.path, path) ok_clean_git(path, annex=False) ok_file_has_content(opj(path, 'test.txt'), 'some')
def test_is_installed(src, path): ds = Dataset(path) assert_false(ds.is_installed()) # get a clone: AnnexRepo(path, src) ok_(ds.is_installed()) # submodule still not installed: subds = Dataset(opj(path, 'sub1')) assert_false(subds.is_installed()) # get the submodule from datalad.cmd import Runner Runner().run(['git', 'submodule', 'update', '--init', 'sub1'], cwd=path) ok_(subds.is_installed())
def test_replace_and_relative_sshpath(src_path, dst_path): # We need to come up with the path relative to our current home directory # https://github.com/datalad/datalad/issues/1653 # but because we override HOME the HOME on the remote end would be # different even though a localhost. So we need to query it from datalad import ssh_manager ssh = ssh_manager.get_connection('localhost') remote_home, err = ssh('pwd') assert not err dst_relpath = os.path.relpath(dst_path, remote_home) url = 'localhost:%s' % dst_relpath ds = Dataset(src_path).create() create_tree(ds.path, {'sub.dat': 'lots of data'}) ds.add('sub.dat') ds.create_sibling(url) published = ds.publish(to='localhost', transfer_data='all') assert_result_count(published, 1, path=opj(ds.path, 'sub.dat')) # verify that hook runs and there is nothing in stderr # since it exits with 0 exit even if there was a problem out, err = Runner(cwd=opj(dst_path, '.git'))(_path_('hooks/post-update')) assert_false(out) assert_false(err) # Verify that we could replace and publish no problem # https://github.com/datalad/datalad/issues/1656 # Strangely it spits outs IncompleteResultsError exception atm... so just # checking that it fails somehow res = ds.create_sibling(url, on_failure='ignore') assert_status('error', res) assert_in('already configured', res[0]['message'][0]) ds.create_sibling(url, existing='replace') published2 = ds.publish(to='localhost', transfer_data='all') assert_result_count(published2, 1, path=opj(ds.path, 'sub.dat')) # and one more test since in above test it would not puke ATM but just # not even try to copy since it assumes that file is already there create_tree(ds.path, {'sub2.dat': 'more data'}) ds.add('sub2.dat') published3 = ds.publish(to='localhost', transfer_data='none') # we publish just git assert_result_count(published3, 0, path=opj(ds.path, 'sub2.dat')) # now publish "with" data, which should also trigger the hook! # https://github.com/datalad/datalad/issues/1658 from glob import glob from datalad.consts import WEB_META_LOG logs_prior = glob(_path_(dst_path, WEB_META_LOG, '*')) published4 = ds.publish(to='localhost', transfer_data='all') assert_result_count(published4, 1, path=opj(ds.path, 'sub2.dat')) logs_post = glob(_path_(dst_path, WEB_META_LOG, '*')) eq_(len(logs_post), len(logs_prior) + 1)
def test_clone_dataladri(src, topurl, path): # make plain git repo ds_path = opj(src, 'ds') gr = GitRepo(ds_path, create=True) gr.add('test.txt') gr.commit('demo') Runner(cwd=gr.path)(['git', 'update-server-info']) # now install it somewhere else with patch('datalad.consts.DATASETS_TOPURL', topurl): ds = clone('///ds', path, result_xfm='datasets', return_type='item-or-list') eq_(ds.path, path) ok_clean_git(path, annex=False) ok_file_has_content(opj(path, 'test.txt'), 'some')
def populate(self): super(SubmoduleDataset, self).populate() # add submodules annex = BasicAnnexTestRepo() annex.create() from datalad.cmd import Runner runner = Runner() kw = dict(cwd=self.path, expect_stderr=True) runner.run(['git', 'submodule', 'add', annex.url, 'sub1'], **kw) runner.run(['git', 'submodule', 'add', annex.url, 'sub2'], **kw) runner.run(['git', 'commit', '-m', 'Added sub1 and sub2.'], **kw) runner.run(['git', 'submodule', 'update', '--init', '--recursive'], **kw) # init annex in subdatasets for s in ('sub1', 'sub2'): runner.run(['git', 'annex', 'init'], cwd=opj(self.path, s), expect_stderr=True)
def test_create_sub_dataset_dot_no_path(path): ds = Dataset(path) ds.create() # Test non-bound call. sub0_path = str(ds.pathobj / "sub0") os.mkdir(sub0_path) with chpwd(sub0_path): subds0 = create(dataset=".") assert_repo_status(ds.path, untracked=[subds0.path]) assert_repo_status(subds0.path) # Test command-line invocation directly (regression from gh-3484). sub1_path = str(ds.pathobj / "sub1") os.mkdir(sub1_path) Runner(cwd=sub1_path).run(["datalad", "create", "-d."]) assert_repo_status(ds.path, untracked=[subds0.path, sub1_path])
def _test_guess_dot_git(annex, path, url, tdir): repo = (AnnexRepo if annex else GitRepo)(path, create=True) repo.add('file.txt', commit=True, git=not annex) # we need to prepare to be served via http, otherwise it must fail with swallow_logs() as cml: assert_raises(IncompleteResultsError, install, path=tdir, source=url) ok_(not exists(tdir)) Runner(cwd=path)(['git', 'update-server-info']) with swallow_logs() as cml: installed = install(tdir, source=url) assert_not_in("Failed to get annex.uuid", cml.out) eq_(realpath(installed.path), realpath(tdir)) ok_(exists(tdir)) ok_clean_git(tdir, annex=annex)
def __init__(self, dataset=None, dataset_only=False): # store in a simple dict # no subclassing, because we want to be largely read-only, and implement # config writing separately self._store = {} self._dataset = dataset self._dataset_only = dataset_only # Since configs could contain sensitive information, to prevent # any "facilitated" leakage -- just disable loging of outputs for # this runner run_kwargs = dict(log_outputs=False) if dataset is not None: # make sure we run the git config calls in the dataset # to pick up the right config files run_kwargs['cwd'] = dataset.path self._runner = Runner(**run_kwargs) self.reload()
def test_GitRepo_get_indexed_files(src, path): gr = GitRepo.clone(src, path) idx_list = gr.get_indexed_files() runner = Runner() out = runner(['git', 'ls-files'], cwd=path) out_list = list(filter(bool, out[0].split('\n'))) for item in idx_list: assert_in( item, out_list, "%s not found in output of git ls-files in %s" % (item, path)) for item in out_list: assert_in( item, idx_list, "%s not found in output of get_indexed_files in %s" % (item, path))
def test_guess_dot_git(path=None, url=None, tdir=None, *, annex): repo = (AnnexRepo if annex else GitRepo)(path, create=True) repo.add('file.txt', git=not annex) repo.commit() # we need to prepare to be served via http, otherwise it must fail with swallow_logs() as cml: assert_raises(IncompleteResultsError, install, path=tdir, source=url) ok_(not exists(tdir)) Runner(cwd=path).run(['git', 'update-server-info']) with swallow_logs() as cml: installed = install(tdir, source=url) assert_not_in("Failed to get annex.uuid", cml.out) eq_(installed.pathobj.resolve(), Path(tdir).resolve()) ok_(exists(tdir)) assert_repo_status(tdir, annex=annex)
def test_quoting(path): ds = Dataset(op.join(path, OBSCURE_FILENAME)).create(force=True) # Our custom procedure fails if it receives anything other than two # procedure arguments (so the script itself receives 3). Check a few cases # from the Python API and CLI. ds.config.add("datalad.locations.dataset-procedures", "code", where="dataset") with swallow_outputs(): ds.run_procedure(spec=["just2args", "with ' sing", 'with " doub']) with assert_raises(CommandError): ds.run_procedure(spec=["just2args", "still-one arg"]) runner = Runner(cwd=ds.path) runner.run( "datalad run-procedure just2args \"with ' sing\" 'with \" doub'") with assert_raises(CommandError): runner.run("datalad run-procedure just2args 'still-one arg'")
def test_create(path): ds = Dataset(path) ds.create( description="funny", # custom git init option initopts=dict(shared='world')) ok_(ds.is_installed()) assert_repo_status(ds.path, annex=True) # check default backend eq_(ds.config.get("annex.backends"), 'MD5E') eq_(ds.config.get("core.sharedrepository"), '2') runner = Runner() # check description in `info` cmd = ['git', 'annex', 'info'] cmlout = runner.run(cmd, cwd=path) assert_in('funny [here]', cmlout[0]) # check datset ID eq_(ds.config.get_value('datalad.dataset', 'id'), ds.id)
def test_runner_log_stderr(): # TODO: no idea of how to check correct logging via any kind of assertion yet. runner = Runner(dry=False) cmd = 'echo stderr-Message should be logged >&2' ret = runner.run(cmd, log_stderr=True, expect_stderr=True) assert_equal( runner.commands, [], "Run of: %s resulted in non-empty buffer: %s" % (cmd, runner.commands.__str__())) cmd = 'echo stderr-Message should not be logged >&2' with swallow_outputs() as cmo: with swallow_logs(new_level=logging.INFO) as cml: ret = runner.run(cmd, log_stderr=False) eq_(cmo.err.rstrip(), "stderr-Message should not be logged") eq_(cml.out, "") assert_equal( runner.commands, [], "Run of: %s resulted in non-empty buffer: %s" % (cmd, runner.commands.__str__()))
def test_runner_parametrized_protocol(): runner = Runner() # protocol returns a given value whatever it receives class ProtocolInt(StdOutCapture): def __init__(self, value): self.value = value super().__init__() def pipe_data_received(self, fd, data): super().pipe_data_received(fd, self.value) res = runner.run( py2cmd('print(1)'), protocol=ProtocolInt, # value passed to protocol constructor value=b'5', ) eq_(res['stdout'], '5')
def __init__(self, path, url=None, runner=None, direct=False): """Creates representation of git-annex repository at `path`. AnnexRepo is initialized by giving a path to the annex. If no annex exists at that location, a new one is created. Optionally give url to clone from. Parameters: ----------- path: str path to git-annex repository. In case it's not an absolute path, it's relative to os.getcwd() url: str url to the to-be-cloned repository. Requires valid git url according to http://www.kernel.org/pub/software/scm/git/docs/git-clone.html#URLS . runner: Runner Provide a Runner in case AnnexRepo shall not create it's own. This is especially needed in case of desired dry runs. direct: bool If True, force git-annex to use direct mode """ super(AnnexRepo, self).__init__(path, url) self.cmd_call_wrapper = runner or Runner() # TODO: Concept of when to set to "dry". Includes: What to do in gitrepo class? # Now: setting "dry" means to give a dry-runner to constructor. # => Do it similar in gitrepo/dataset. Still we need a concept of when to set it # and whether this should be a single instance collecting everything or more # fine grained. # Check whether an annex already exists at destination if not exists(opj(self.path, '.git', 'annex')): lgr.debug('No annex found in %s. Creating a new one ...' % self.path) self._annex_init() # only force direct mode; don't force indirect mode if direct and not self.is_direct_mode(): self.set_direct_mode()
def test_replace_and_relative_sshpath(src_path, dst_path): # We need to come up with the path relative to our current home directory # https://github.com/datalad/datalad/issues/1653 dst_relpath = os.path.relpath(dst_path, os.path.expanduser('~')) url = 'localhost:%s' % dst_relpath ds = Dataset(src_path).create() create_tree(ds.path, {'sub.dat': 'lots of data'}) ds.add('sub.dat') ds.create_sibling(url) published = ds.publish('.', to='localhost') assert_in('sub.dat', published[0]) # verify that hook runs and there is nothing in stderr # since it exits with 0 exit even if there was a problem out, err = Runner(cwd=opj(dst_path, '.git'))(_path_('hooks/post-update')) assert_false(out) assert_false(err) # Verify that we could replace and publish no problem # https://github.com/datalad/datalad/issues/1656 # Strangely it spits outs IncompleteResultsError exception atm... so just # checking that it fails somehow assert_raises(Exception, ds.create_sibling, url) ds.create_sibling(url, existing='replace') published2 = ds.publish('.', to='localhost') assert_in('sub.dat', published2[0]) # and one more test since in above test it would not puke ATM but just # not even try to copy since it assumes that file is already there create_tree(ds.path, {'sub2.dat': 'more data'}) ds.add('sub2.dat') published3 = ds.publish(to='localhost') # we publish just git assert_not_in('sub2.dat', published3[0]) # now publish "with" data, which should also trigger the hook! # https://github.com/datalad/datalad/issues/1658 from glob import glob from datalad.consts import WEB_META_LOG logs_prior = glob(_path_(dst_path, WEB_META_LOG, '*')) published4 = ds.publish('.', to='localhost') assert_in('sub2.dat', published4[0]) logs_post = glob(_path_(dst_path, WEB_META_LOG, '*')) eq_(len(logs_post), len(logs_prior) + 1)
def test_runner_stdin(path): runner = Runner() fakestdin = Path(path) / 'io' # go for difficult content fakestdin.write_text(OBSCURE_FILENAME) res = runner.run( py2cmd('import fileinput; print(fileinput.input().readline())'), stdin=fakestdin.open(), protocol=StdOutCapture, ) assert_in(OBSCURE_FILENAME, res['stdout']) # we can do the same without a tempfile, too res = runner.run( py2cmd('import fileinput; print(fileinput.input().readline())'), stdin=OBSCURE_FILENAME.encode('utf-8'), protocol=StdOutCapture, ) assert_in(OBSCURE_FILENAME, res['stdout'])
def test_create(path): ds = Dataset(path) ds.create(description="funny", native_metadata_type=['bim', 'bam', 'bum'], shared_access='world') ok_(ds.is_installed()) ok_clean_git(ds.path, annex=True) # check default backend eq_(ds.config.get("annex.backends"), 'MD5E') eq_(ds.config.get("core.sharedrepository"), '2') runner = Runner() # check description in `info` cmd = ['git-annex', 'info'] cmlout = runner.run(cmd, cwd=path) assert_in('funny [here]', cmlout[0]) # check datset ID eq_(ds.config.get_value('datalad.dataset', 'id'), ds.id) assert_equal(ds.config.get_value('datalad.metadata', 'nativetype'), ('bim', 'bam', 'bum'))
def test_script_shims(): runner = Runner() # The EASY-INSTALL checks below aren't valid for editable installs. Use the # existence of setup.py as an indication that install is _probably_ # editable. The file should always exist for editable installs, but it can # also exist for non-editable installs when the tests are being executed # from the top of the source tree. setup_exists = (Path(datalad.__file__).parent.parent / "setup.py").exists() for script in [ 'datalad', 'git-annex-remote-datalad-archives', 'git-annex-remote-datalad' ]: if not on_windows: # those must be available for execution, and should not contain which = runner.run(['which', script], protocol=StdOutErrCapture)['stdout'] # test if there is no easy install shim in there with open(which.rstrip()) as f: content = f.read() else: from distutils.spawn import find_executable content = find_executable(script) if not setup_exists: assert_not_in('EASY', content) # NOTHING easy should be there assert_not_in('pkg_resources', content) # and let's check that it is our script out = runner.run([script, '--version'], protocol=StdOutErrCapture) version = out['stdout'].rstrip() mod, version = version.split(' ', 1) assert_equal(mod, 'datalad') # we can get git and non git .dev version... so for now # relax get_numeric_portion = lambda v: [ x for x in v.split('.') if x.isdigit() ] # extract numeric portion assert get_numeric_portion(version) # that my lambda is correctish assert_equal(get_numeric_portion(__version__), get_numeric_portion(version))
def test_runner(tempfile): # test non-dry command call runner = Runner(dry=False) cmd = 'echo Testing real run > %s' % tempfile ret = runner.run(cmd) assert_equal( runner.commands, [], "Run of: %s resulted in non-empty buffer: %s" % (cmd, runner.commands.__str__())) assert_true( os.path.exists(tempfile), "Run of: %s resulted with non-existing file %s" % (cmd, tempfile)) # test non-dry python function call output = runner.call(os.path.join, 'foo', 'bar') assert_equal(os.path.join('foo', 'bar'), output, "Drycall of: os.path.join, 'foo', 'bar' returned %s" % output) assert_equal( runner.commands.__str__().find('os.path.join'), -1, "Drycall of: os.path.join, 'foo', 'bar' resulted in buffer: %s" % runner.commands.__str__())
def populate(self): super(NestedDataset, self).populate() ds = SubmoduleDataset() ds.create() from datalad.cmd import Runner runner = Runner() kw = dict(expect_stderr=True) runner.run(['git', 'submodule', 'add', ds.url, 'subdataset'], cwd=self.path, **kw) runner.run(['git', 'submodule', 'add', ds.url, 'subsubdataset'], cwd=opj(self.path, 'subdataset'), **kw) runner.run(['git', 'commit', '-m', 'Added subdataset.'], cwd=opj(self.path, 'subdataset'), **kw) runner.run(['git', 'commit', '-a', '-m', 'Added subdatasets.'], cwd=self.path, **kw) runner.run(['git', 'submodule', 'update', '--init', '--recursive'], cwd=self.path, **kw) # init all annexes for s in ('', 'subdataset', opj('subdataset', 'subsubdataset')): runner.run(['git', 'annex', 'init'], cwd=opj(self.path, s), expect_stderr=True)
def decompress_file(archive, dir_): """Decompress `archive` into a directory `dir_` This is an alternative implementation without patool, but directly calling 7z. Parameters ---------- archive: str dir_: str """ apath = Path(archive) runner = Runner(cwd=dir_) suffixes = _normalize_fname_suffixes(apath.suffixes) if len(suffixes) > 1 and suffixes[-2] == '.tar': # we have a compressed tar file that needs to be fed through the # decompressor first cmd = '7z x {} -so | 7z x -si -ttar'.format(quote_cmdlinearg(archive)) else: # fire and forget cmd = ['7z', 'x', archive] runner.run(cmd, protocol=KillOutput)
def test_script_shims(script): runner = Runner() if not on_windows: from shutil import which which(script) # and let's check that it is our script out = runner.run([script, '--version'], protocol=StdOutErrCapture) version = out['stdout'].rstrip() mod, version = version.split(' ', 1) assert_equal(mod, 'datalad') # we can get git and non git .dev version... so for now # relax get_numeric_portion = lambda v: [ x for x in re.split('[+.]', v) if x.isdigit() ] # extract numeric portion assert get_numeric_portion( version), f"Got no numeric portion from {version}" assert_equal(get_numeric_portion(__version__), get_numeric_portion(version))
def test_asyncio_loop_noninterference1(path1, path2): if on_windows and sys.version_info < (3, 8): raise SkipTest( "get_event_loop() raises " "RuntimeError: There is no current event loop in thread 'MainThread'.") # minimalistic use case provided by Dorota import datalad.api as dl src = dl.create(path1) reproducer = src.pathobj/ "reproducer.py" reproducer.write_text(f"""\ import asyncio asyncio.get_event_loop() import datalad.api as datalad ds = datalad.clone(path=r'{path2}', source=r"{path1}") loop = asyncio.get_event_loop() assert loop # simulate outside process closing the loop loop.close() # and us still doing ok ds.status() """) Runner().run([sys.executable, str(reproducer)]) # if Error -- the test failed
def decompress_file(archive, dir_): """Decompress `archive` into a directory `dir_` This is an alternative implementation without patool, but directly calling 7z. Parameters ---------- archive: str dir_: str """ apath = Path(archive) runner = Runner(cwd=dir_) if len(apath.suffixes) > 1 and apath.suffixes[-2] == '.tar': # we have a compressed tar file that needs to be fed through the # decompressor first # hangs somehow, do via single string arg #cmd = ['7z', 'x', archive, '-so', '|', '7z', 'x', '-si', '-ttar'] cmd = '7z x {} -so | 7z x -si -ttar'.format(quote_cmdlinearg(archive)) else: # fire and forget cmd = ['7z', 'x', archive] runner.run(cmd)
def test_asyncio_forked(temp): # temp will be used to communicate from child either it succeeded or not temp = Path(temp) runner = Runner() import os try: pid = os.fork() except BaseException as exc: # .fork availability is "Unix", and there are cases where it is "not supported" # so we will just skip if no forking is possible raise SkipTest(f"Cannot fork: {exc}") # if does not fail (in original or in a fork) -- we are good if sys.version_info < (3, 8) and pid != 0: # for some reason it is crucial to sleep a little (but 0.001 is not enough) # in the master process with older pythons or it takes forever to make the child run sleep(0.1) try: runner.run([sys.executable, '--version'], protocol=StdOutCapture) if pid == 0: temp.write_text("I rule") except: if pid == 0: temp.write_text("I suck") if pid != 0: # parent: look after the child t0 = time() try: while not temp.exists() or temp.stat().st_size < 6: if time() - t0 > 5: raise AssertionError( "Child process did not create a file we expected!") finally: # kill the child os.kill(pid, signal.SIGTERM) # see if it was a good one eq_(temp.read_text(), "I rule") else: # sleep enough so parent just kills me the kid before I continue doing bad deeds sleep(10)
def test_external_modification(path=None): from datalad.cmd import WitlessRunner as Runner runner = Runner(cwd=path) repo = GitRepo(path, create=True) config = repo.config key = 'sec.sub.key' assert_not_in(key, config) config.set(key, '1', scope='local') assert_equal(config[key], '1') # we pick up the case where we modified so size changed runner.run(['git', 'config', '--local', '--replace-all', key, '10']) # unfortunately we do not react for .get unless reload. But here # we will test if reload is correctly decides to reload without force config.reload() assert_equal(config[key], '10') # and no size change runner.run(['git', 'config', '--local', '--replace-all', key, '11']) config.reload() assert_equal(config[key], '11')
def test_custom_runner_protocol(path): # Check that a runner with a non-default protocol gets wired up correctly. prot = ExecutionTimeProtocol() gr = GitRepo(path, runner=Runner(cwd=path, protocol=prot), create=True) ok_(len(prot) > 0) ok_(prot[0]['duration'] >= 0) def check(prev_len, prot, command): # Check that the list grew and has the expected command without # assuming that it gained _only_ a one command. ok_(len(prot) > prev_len) assert_in(command, sum([p["command"] for p in prot[prev_len:]], [])) prev_len = len(prot) gr.add("foo") check(prev_len, prot, "add") prev_len = len(prot) gr.commit("commit foo") check(prev_len, prot, "commit") ok_(all(p['duration'] >= 0 for p in prot))
def test_runner_dry(tempfile): runner = Runner(dry=True) # test dry command call cmd = 'echo Testing dry run > %s' % tempfile ret = runner.run(cmd) assert_equal(("DRY", "DRY"), ret, "Dry run of: %s resulted in output %s" % (cmd, ret)) assert_greater( runner.commands.__str__().find('echo Testing dry run'), -1, "Dry run of: %s resulted in buffer: %s" % (cmd, runner.commands.__str__())) assert_false(os.path.exists(tempfile)) # test dry python function call output = runner.call(os.path.join, 'foo', 'bar') assert_is(None, output, "Drycall of: os.path.join, 'foo', 'bar' returned %s" % output) assert_greater( runner.commands.__str__().find('join'), -1, "Drycall of: os.path.join, 'foo', 'bar' resulted in buffer: %s" % runner.commands.__str__())