def test_runner_log_stdout(): # TODO: no idea of how to check correct logging via any kind of assertion yet. runner = Runner(dry=False) cmd_ = ['echo', 'stdout-Message should be logged'] for cmd in [cmd_, ' '.join(cmd_)]: # should be identical runs, either as a string or as a list kw = {} # on Windows it can't find echo if ran outside the shell if on_windows and isinstance(cmd, list): kw['shell'] = True with swallow_logs(logging.DEBUG) as cm: ret = runner.run(cmd, log_stdout=True, **kw) eq_(cm.lines[0], "Running: %s" % cmd) if not on_windows: # we can just count on sanity eq_(cm.lines[1], "stdout| stdout-Message should be logged") else: # echo outputs quoted lines for some reason, so relax check ok_("stdout-Message should be logged" in cm.lines[1]) assert_equal(runner.commands, [], "Run of: %s resulted in non-empty buffer: %s" % (cmd, runner.commands.__str__())) cmd = 'echo stdout-Message should not be logged' with swallow_outputs() as cmo: with swallow_logs(new_level=logging.INFO) as cml: ret = runner.run(cmd, log_stdout=False) eq_(cmo.out, "stdout-Message should not be logged\n") eq_(cml.out, "") assert_equal(runner.commands, [], "Run of: %s resulted in non-empty buffer: %s" % (cmd, runner.commands.__str__()))
def test_url_base(): # Basic checks assert_raises(ValueError, URL, "http://example.com", hostname='example.com') url = URL("http://example.com") eq_(url.hostname, 'example.com') eq_(url.scheme, 'http') eq_(url.port, '') # not specified -- empty strings eq_(url.username, '') # not specified -- empty strings eq_(repr(url), "URL(hostname='example.com', scheme='http')") eq_(url, "http://example.com") # automagic coercion in __eq__ neq_(URL(), URL(hostname='x')) smth = URL('smth') eq_(smth.hostname, '') ok_(bool(smth)) nok_(bool(URL())) assert_raises(ValueError, url._set_from_fields, unknown='1') with swallow_logs(new_level=logging.WARNING) as cml: # we don't "care" about params ATM so there is a warning if there are any purl = URL("http://example.com/;param") eq_(str(purl), 'http://example.com/;param') # but we do maintain original string assert_in('ParseResults contains params', cml.out) eq_(purl.as_str(), 'http://example.com/')
def test_submodule_deinit(path): from datalad.support.annexrepo import AnnexRepo top_repo = AnnexRepo(path, create=False) eq_({'subm 1', '2'}, {s.name for s in top_repo.get_submodules()}) # note: here init=True is ok, since we are using it just for testing with swallow_logs(new_level=logging.WARN) as cml: top_repo.update_submodule('subm 1', init=True) assert_in('Do not use update_submodule with init=True', cml.out) top_repo.update_submodule('2', init=True) # ok_(all([s.module_exists() for s in top_repo.get_submodules()])) # TODO: old assertion above if non-bare? (can't use "direct mode" in test_gitrepo) # Alternatively: New testrepo (plain git submodules) and have a dedicated # test for annexes in addition ok_(all([GitRepo.is_valid_repo(op.join(top_repo.path, s.path)) for s in top_repo.get_submodules()])) # modify submodule: with open(op.join(top_repo.path, 'subm 1', 'file_ut.dat'), "w") as f: f.write("some content") assert_raises(CommandError, top_repo.deinit_submodule, 'sub1') # using force should work: top_repo.deinit_submodule('subm 1', force=True) ok_(not top_repo.repo.submodule('subm 1').module_exists())
def test_install_crcns(tdir, ds_path): with chpwd(tdir): with swallow_logs(new_level=logging.INFO) as cml: install("all-nonrecursive", source='///') # since we didn't log decorations such as log level atm while # swallowing so lets check if exit code is returned or not # I will test both assert_not_in('ERROR', cml.out) # below one must not fail alone! ;) assert_not_in('with exit code', cml.out) # should not hang in infinite recursion with chpwd('all-nonrecursive'): get("crcns") ok_(exists(_path_("all-nonrecursive/crcns/.git/config"))) # and we could repeat installation and get the same result ds1 = install(_path_("all-nonrecursive/crcns")) ds2 = Dataset('all-nonrecursive').install('crcns') ok_(ds1.is_installed()) eq_(ds1, ds2) eq_(ds1.path, ds2.path) # to make sure they are a single dataset # again, but into existing dataset: ds = create(ds_path) crcns = ds.install("///crcns") ok_(crcns.is_installed()) eq_(crcns.path, opj(ds_path, "crcns")) assert_in(crcns.path, ds.get_subdatasets(absolute=True))
def test_globbedpaths(path): dotdir = op.curdir + op.sep for patterns, expected in [ (["1.txt", "2.dat"], {"1.txt", "2.dat"}), ([dotdir + "1.txt", "2.dat"], {dotdir + "1.txt", "2.dat"}), (["*.txt", "*.dat"], {"1.txt", "2.dat", u"bβ.dat", "3.txt"}), ([dotdir + "*.txt", "*.dat"], {dotdir + "1.txt", "2.dat", u"bβ.dat", dotdir + "3.txt"}), (["subdir/*.txt"], {"subdir/1.txt", "subdir/2.txt"}), ([dotdir + "subdir/*.txt"], {dotdir + p for p in ["subdir/1.txt", "subdir/2.txt"]}), (["*.txt"], {"1.txt", "3.txt"})]: gp = GlobbedPaths(patterns, pwd=path) eq_(set(gp.expand()), expected) eq_(set(gp.expand(full=True)), {op.join(path, p) for p in expected}) pardir = op.pardir + op.sep subdir_path = op.join(path, "subdir") for patterns, expected in [ (["*.txt"], {"1.txt", "2.txt"}), ([dotdir + "*.txt"], {dotdir + p for p in ["1.txt", "2.txt"]}), ([pardir + "*.txt"], {pardir + p for p in ["1.txt", "3.txt"]}), ([dotdir + pardir + "*.txt"], {dotdir + pardir + p for p in ["1.txt", "3.txt"]}), (["subdir/"], {"subdir/"})]: gp = GlobbedPaths(patterns, pwd=subdir_path) eq_(set(gp.expand()), expected) eq_(set(gp.expand(full=True)), {op.join(subdir_path, p) for p in expected}) # Full patterns still get returned as relative to pwd. gp = GlobbedPaths([op.join(path, "*.dat")], pwd=path) eq_(gp.expand(), ["2.dat", u"bβ.dat"]) # "." gets special treatment. gp = GlobbedPaths([".", "*.dat"], pwd=path) eq_(set(gp.expand()), {"2.dat", u"bβ.dat", "."}) eq_(gp.expand(dot=False), ["2.dat", u"bβ.dat"]) gp = GlobbedPaths(["."], pwd=path, expand=False) eq_(gp.expand(), ["."]) eq_(gp.paths, ["."]) # We can the glob outputs. glob_results = {"z": "z", "a": ["x", "d", "b"]} with patch('glob.glob', glob_results.get): gp = GlobbedPaths(["z", "a"]) eq_(gp.expand(), ["z", "b", "d", "x"]) # glob expansion for paths property is determined by expand argument. for expand, expected in [(True, ["2.dat", u"bβ.dat"]), (False, ["*.dat"])]: gp = GlobbedPaths(["*.dat"], pwd=path, expand=expand) eq_(gp.paths, expected) with swallow_logs(new_level=logging.DEBUG) as cml: GlobbedPaths(["not here"], pwd=path).expand() assert_in("No matching files found for 'not here'", cml.out)
def test_addurls_dry_run(path): ds = Dataset(path).create(force=True) with chpwd(path): json_file = "links.json" with open(json_file, "w") as jfh: json.dump([{"url": "URL/a.dat", "name": "a", "subdir": "foo"}, {"url": "URL/b.dat", "name": "b", "subdir": "bar"}, {"url": "URL/c.dat", "name": "c", "subdir": "foo"}], jfh) ds.save(message="setup") with swallow_logs(new_level=logging.INFO) as cml: ds.addurls(json_file, "{url}", "{subdir}//{_url_filename_root}", dry_run=True) for dir_ in ["foo", "bar"]: assert_in("Would create a subdataset at {}".format(dir_), cml.out) assert_in( "Would download URL/a.dat to {}".format( os.path.join(path, "foo", "BASE")), cml.out) assert_in("Metadata: {}".format([u"name=a", u"subdir=foo"]), cml.out)
def test_addurls_subdataset(self, path): ds = Dataset(path).create(force=True) with chpwd(path): for save in True, False: label = "save" if save else "nosave" hexsha_before = ds.repo.get_hexsha() ds.addurls(self.json_file, "{url}", "{subdir}-" + label + "//{name}", save=save) hexsha_after = ds.repo.get_hexsha() for fname in ["foo-{}/a", "bar-{}/b", "foo-{}/c"]: ok_exists(fname.format(label)) assert_true(save ^ (hexsha_before == hexsha_after)) assert_true(save ^ ds.repo.dirty) # Now save the "--nosave" changes and check that we have # all the subdatasets. ds.add(".") eq_(set(subdatasets(ds, recursive=True, result_xfm="relpaths")), {"foo-save", "bar-save", "foo-nosave", "bar-nosave"}) # We don't try to recreate existing subdatasets. with swallow_logs(new_level=logging.DEBUG) as cml: ds.addurls(self.json_file, "{url}", "{subdir}-nosave//{name}") assert_in("Not creating subdataset at existing path", cml.out)
def test_install_skip_list_arguments(src, path, path_outside): ds = install(path, source=src) ok_(ds.is_installed()) # install a list with valid and invalid items: with swallow_logs(new_level=logging.WARNING) as cml: with assert_raises(IncompleteResultsError) as cme: ds.install( path=['subm 1', 'not_existing', path_outside, 'subm 2'], get_data=False) result = cme.exception.results for skipped in [opj(ds.path, 'not_existing'), path_outside]: cml.assert_logged(msg="ignored non-existing paths: {}\n".format( [opj(ds.path, 'not_existing'), path_outside]), regex=False, level='WARNING') pass ok_(isinstance(result, list)) eq_(len(result), 2) for sub in [Dataset(opj(path, 'subm 1')), Dataset(opj(path, 'subm 2'))]: assert_in(sub, result) ok_(sub.is_installed()) # return of get is always a list, even if just one thing was gotten # in this case 'subm1' was already obtained above, so this will get this # content of the subdataset with assert_raises(IncompleteResultsError) as cme: ds.install(path=['subm 1', 'not_existing']) with assert_raises(IncompleteResultsError) as cme: ds.get(path=['subm 1', 'not_existing']) result = cme.exception.results eq_(len(result), 1) eq_(result[0]['file'], 'subm 1/test-annex.dat')
def _check_ri(ri, cls, exact_str=True, localpath=None, **fields): """just a helper to carry out few checks on urls""" with swallow_logs(new_level=logging.DEBUG) as cml: ri_ = cls(**fields) murl = RI(ri) eq_(murl.__class__, cls) # not just a subclass eq_(murl, ri_) eq_(str(RI(ri)), ri) eq_(eval(repr(ri_)), ri) # repr leads back to identical ri_ eq_(ri, ri_) # just in case ;) above should fail first if smth is wrong if not exact_str: assert_in('Parsed version of', cml.out) (eq_ if exact_str else neq_)(ri, str(ri_)) # that we can reconstruct it EXACTLY on our examples # and that we have access to all those fields nok_(set(fields).difference(set(cls._FIELDS))) for f, v in fields.items(): eq_(getattr(ri_, f), v) if localpath: eq_(ri_.localpath, localpath) old_localpath = ri_.localpath # for a test below else: # if not given -- must be a remote url, should raise exception with assert_raises(ValueError): ri_.localpath # do changes in the path persist? old_str = str(ri_) ri_.path = newpath = opj(ri_.path, 'sub') eq_(ri_.path, newpath) neq_(str(ri_), old_str) if localpath: eq_(ri_.localpath, opj(old_localpath, 'sub'))
def test_ssh_custom_identity_file(): ifile = "/tmp/dl-test-ssh-id" # Travis if not op.exists(ifile): raise SkipTest("Travis-specific '{}' identity file does not exist" .format(ifile)) from datalad import cfg try: with patch.dict("os.environ", {"DATALAD_SSH_IDENTITYFILE": ifile}): cfg.reload(force=True) with swallow_logs(new_level=logging.DEBUG) as cml: manager = SSHManager() ssh = manager.get_connection('ssh://localhost') cmd_out, _ = ssh("echo blah") expected_socket = op.join( text_type(manager.socket_dir), get_connection_hash("localhost", identity_file=ifile, bundled=True)) ok_(exists(expected_socket)) manager.close() assert_in("-i", cml.out) assert_in(ifile, cml.out) finally: # Prevent overridden DATALAD_SSH_IDENTITYFILE from lingering. cfg.reload(force=True)
def _test_guess_dot_git(annex, path, url, tdir): repo = (AnnexRepo if annex else GitRepo)(path, create=True) repo.add('file.txt', commit=True, git=not annex) # we need to prepare to be served via http, otherwise it must fail with swallow_logs() as cml: assert_raises(GitCommandError, install, path=tdir, source=url) ok_(not exists(tdir)) Runner(cwd=path)(['git', 'update-server-info']) with swallow_logs() as cml: installed = install(tdir, source=url) assert_not_in("Failed to get annex.uuid", cml.out) eq_(realpath(installed.path), realpath(tdir)) ok_(exists(tdir)) ok_clean_git(tdir, annex=annex)
def test_install_datasets_root(tdir): with chpwd(tdir): ds = install("///") ok_(ds.is_installed()) eq_(ds.path, opj(tdir, 'datasets.datalad.org')) # do it a second time: with swallow_logs(new_level=logging.INFO) as cml: result = install("///") assert_in("was already installed from", cml.out) eq_(result, ds) # and a third time into an existing something, that is not a dataset: with open(opj(tdir, 'sub', 'a_file.txt'), 'w') as f: f.write("something") with swallow_logs(new_level=logging.WARNING) as cml: result = install("sub", source='///') assert_in("already exists and is not an installed dataset", cml.out) ok_(result is None)
def test_AnnexRepo_instance_from_clone(src, dst): ar = AnnexRepo(dst, src) assert_is_instance(ar, AnnexRepo, "AnnexRepo was not created.") assert_true(os.path.exists(os.path.join(dst, '.git', 'annex'))) # do it again should raise GitCommandError since git will notice there's already a git-repo at that path # and therefore can't clone to `dst` with swallow_logs() as cm: assert_raises(GitCommandError, AnnexRepo, dst, src) assert("already exists" in cm.out)
def check_filters(name): with swallow_logs(new_level=logging.DEBUG, name=name) as cml: lgr1 = logging.getLogger(name + '.goodone') lgr2 = logging.getLogger(name + '.anotherone') lgr3 = logging.getLogger(name + '.bad') lgr1.debug('log1') lgr2.info('log2') lgr3.info('log3') assert_in('log1', cml.out) assert_in('log2', cml.out) assert 'log3' not in cml.out
def test_addurls(self, path): ds = Dataset(path).create(force=True) def get_annex_commit_counts(): return int( ds.repo.repo.git.rev_list("--count", "git-annex").strip()) n_annex_commits = get_annex_commit_counts() with chpwd(path): ds.addurls(self.json_file, "{url}", "{name}") filenames = ["a", "b", "c"] for fname in filenames: ok_exists(fname) for (fname, meta), subdir in zip(ds.repo.get_metadata(filenames), ["foo", "bar", "foo"]): assert_dict_equal(meta, {"subdir": [subdir], "name": [fname]}) # Ignore this check if we're faking dates because that disables # batch mode. if not os.environ.get('DATALAD_FAKE__DATES'): # We should have two new commits on the git-annex: one for the # added urls and one for the added metadata. eq_(n_annex_commits + 2, get_annex_commit_counts()) # Add to already existing links, overwriting. with swallow_logs(new_level=logging.DEBUG) as cml: ds.addurls(self.json_file, "{url}", "{name}", ifexists="overwrite") for fname in filenames: assert_in("Removing {}".format(os.path.join(path, fname)), cml.out) # Add to already existing links, skipping. assert_in_results( ds.addurls(self.json_file, "{url}", "{name}", ifexists="skip"), action="addurls", status="notneeded") # Add to already existing links works, as long content is the same. ds.addurls(self.json_file, "{url}", "{name}") # But it fails if something has changed. ds.unlock("a") with open("a", "w") as ofh: ofh.write("changed") ds.save("a") assert_raises(IncompleteResultsError, ds.addurls, self.json_file, "{url}", "{name}")
def test_rerun_old_flag_compatibility(path): ds = Dataset(path).create() ds.run("echo x$(cat grows) > grows") # Deprecated `datalad --rerun` still runs the last commit's # command. ds.run(rerun=True) eq_("xx\n", open(opj(path, "grows")).read()) # Running with --rerun and a command ignores the command. with swallow_logs(new_level=logging.WARN) as cml: ds.run(rerun=True, cmd="ignored") assert_in("Ignoring provided command in --rerun mode", cml.out) eq_("xxx\n", open(opj(path, "grows")).read())
def test_basics(path, nodspath): ds = Dataset(path).create() direct_mode = ds.repo.is_direct_mode() last_state = ds.repo.get_hexsha() # run inside the dataset with chpwd(path), \ swallow_outputs(): # provoke command failure with assert_raises(CommandError) as cme: ds.run('7i3amhmuch9invalid') # let's not speculate that the exit code is always 127 ok_(cme.code > 0) eq_(last_state, ds.repo.get_hexsha()) # now one that must work res = ds.run('touch empty', message='TEST') ok_clean_git(ds.path) assert_result_count(res, 2) # TODO 'state' is still untracked!!! assert_result_count(res, 1, action='add', path=opj(ds.path, 'empty'), type='file') assert_result_count(res, 1, action='save', path=ds.path) commit_msg = ds.repo.format_commit("%B") ok_(commit_msg.startswith('[DATALAD RUNCMD] TEST')) # crude test that we have a record for the PWD assert_in('"pwd": "."', commit_msg) last_state = ds.repo.get_hexsha() # now run a command that will not alter the dataset res = ds.run('touch empty', message='NOOP_TEST') # When in direct mode, check at the level of save rather than add # because the annex files show up as typechanges and adding them won't # necessarily have a "notneeded" status. assert_result_count(res, 1, action='save' if direct_mode else 'add', status='notneeded') eq_(last_state, ds.repo.get_hexsha()) # We can also run the command via a single-item list because this is # what the CLI interface passes in for quoted commands. res = ds.run(['touch empty'], message='NOOP_TEST') assert_result_count(res, 1, action='save' if direct_mode else 'add', status='notneeded') # run outside the dataset, should still work but with limitations with chpwd(nodspath), \ swallow_outputs(): res = ds.run(['touch', 'empty2'], message='TEST') assert_result_count(res, 1, action='add', path=opj(ds.path, 'empty2'), type='file', status='ok') assert_result_count(res, 1, action='save', status='ok') # running without a command is a noop with chpwd(path): with swallow_logs(new_level=logging.WARN) as cml: ds.run() assert_in("No command given", cml.out)
def test_install_dataladri(src, topurl, path): # make plain git repo ds_path = opj(src, 'ds') gr = GitRepo(ds_path, create=True) gr.add('test.txt') gr.commit('demo') Runner(cwd=gr.path)(['git', 'update-server-info']) # now install it somewhere else with patch('datalad.consts.DATASETS_TOPURL', topurl), \ swallow_logs(): ds = install(path, source='///ds') eq_(ds.path, path) ok_clean_git(path, annex=False) ok_file_has_content(opj(path, 'test.txt'), 'some')
def test_add_insufficient_args(path): # no argument: assert_raises(InsufficientArgumentsError, add) # no `path`, no `source`: assert_raises(InsufficientArgumentsError, add, dataset=path) with chpwd(path): with swallow_logs(new_level=logging.WARNING) as cml: assert_raises(InsufficientArgumentsError, add, path="some") assert_in('ignoring non-existent', cml.out) ds = Dataset(path) ds.create() assert_raises(InsufficientArgumentsError, ds.add, opj(pardir, 'path', 'outside'))
def test_run_explicit(path): ds = Dataset(path) assert_false(ds.repo.file_has_content("test-annex.dat")) create_tree(ds.path, {"dirt_untracked": "untracked", "dirt_modified": "modified"}) ds.save("dirt_modified", to_git=True) with open(op.join(path, "dirt_modified"), "a") as ofh: ofh.write(", more") # We need explicit=True to run with dirty repo. assert_status("impossible", ds.run("cat test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], on_failure="ignore")) hexsha_initial = ds.repo.get_hexsha() # If we specify test-annex.dat as an input, it will be retrieved before the # run. ds.run("cat test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], explicit=True) ok_(ds.repo.file_has_content("test-annex.dat")) # We didn't commit anything because outputs weren't specified. assert_false(ds.repo.file_has_content("doubled.dat")) eq_(hexsha_initial, ds.repo.get_hexsha()) # If an input doesn't exist, we just show the standard warning. with swallow_logs(new_level=logging.WARN) as cml: with swallow_outputs(): ds.run("ls", inputs=["not-there"], explicit=True) assert_in("Input does not exist: ", cml.out) remove(op.join(path, "doubled.dat")) hexsha_initial = ds.repo.get_hexsha() ds.run("cat test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], outputs=["doubled.dat"], explicit=True) ok_(ds.repo.file_has_content("doubled.dat")) assert_repo_status(ds.path, modified=["dirt_modified"], untracked=['dirt_untracked']) neq_(hexsha_initial, ds.repo.get_hexsha()) # Saving explicit outputs works from subdirectories. subdir = op.join(path, "subdir") mkdir(subdir) with chpwd(subdir): run("echo insubdir >foo", explicit=True, outputs=["foo"]) ok_(ds.repo.file_has_content(op.join("subdir", "foo")))
def test_link_file_load(tempfile): tempfile2 = tempfile + '_' with open(tempfile, 'w') as f: f.write("LOAD") link_file_load(tempfile, tempfile2) # this should work in general ok_(os.path.exists(tempfile2)) with open(tempfile2, 'r') as f: assert_equal(f.read(), "LOAD") def inode(fname): with open(fname) as fd: return os.fstat(fd.fileno()).st_ino def stats(fname, times=True): """Return stats on the file which should have been preserved""" with open(fname) as fd: st = os.fstat(fd.fileno()) stats = (st.st_mode, st.st_uid, st.st_gid, st.st_size) if times: return stats + (st.st_atime, st.st_mtime) else: return stats # despite copystat mtime is not copied. TODO # st.st_mtime) if on_linux or on_osx: # above call should result in the hardlink assert_equal(inode(tempfile), inode(tempfile2)) assert_equal(stats(tempfile), stats(tempfile2)) # and if we mock absence of .link class raise_AttributeError: def __call__(*args): raise AttributeError("TEST") with patch('os.link', new_callable=raise_AttributeError): with swallow_logs(logging.WARNING) as cm: link_file_load(tempfile, tempfile2) # should still work ok_("failed (TEST), copying file" in cm.out) # should be a copy (either originally for windows, or after mocked call) ok_(inode(tempfile) != inode(tempfile2)) with open(tempfile2, 'r') as f: assert_equal(f.read(), "LOAD") assert_equal(stats(tempfile, times=False), stats(tempfile2, times=False)) os.unlink(tempfile2) # TODO: next two with_tempfile
def test_GitRepo_instance_from_clone(src, dst): gr = GitRepo.clone(src, dst) assert_is_instance(gr, GitRepo, "GitRepo was not created.") assert_is_instance(gr.repo, gitpy.Repo, "Failed to instantiate GitPython Repo object.") ok_(op.exists(op.join(dst, '.git'))) # do it again should raise GitCommandError since git will notice there's # already a git-repo at that path and therefore can't clone to `dst` # Note: Since GitRepo is now a WeakSingletonRepo, this is prevented from # happening atm. Disabling for now: # raise SkipTest("Disabled for RF: WeakSingletonRepo") with swallow_logs() as logs: assert_raises(GitCommandError, GitRepo.clone, src, dst)
def test_ssh_manager_close_no_throw(): manager = SSHManager() class bogus: def close(self): raise Exception("oh I am so bad") manager._connections['bogus'] = bogus() assert_raises(Exception, manager.close) assert_raises(Exception, manager.close) # but should proceed just fine if allow_fail=False with swallow_logs(new_level=logging.DEBUG) as cml: manager.close(allow_fail=False) assert_in('Failed to close a connection: oh I am so bad', cml.out)
def test_runner_log_stderr(): # TODO: no idea of how to check correct logging via any kind of assertion yet. runner = Runner(dry=False) cmd = 'echo stderr-Message should be logged >&2' ret = runner.run(cmd, log_stderr=True, expect_stderr=True) assert_equal(runner.commands, [], "Run of: %s resulted in non-empty buffer: %s" % (cmd, runner.commands.__str__())) cmd = 'echo stderr-Message should not be logged >&2' with swallow_outputs() as cmo: with swallow_logs(new_level=logging.INFO) as cml: ret = runner.run(cmd, log_stderr=False) eq_(cmo.err.rstrip(), "stderr-Message should not be logged") eq_(cml.out, "") assert_equal(runner.commands, [], "Run of: %s resulted in non-empty buffer: %s" % (cmd, runner.commands.__str__()))
def test_is_url(): ok_(is_url('file://localhost/some')) ok_(is_url('http://localhost')) ok_(is_url('ssh://me@localhost')) # in current understanding it is indeed a url but an 'ssh', implicit=True, not just # a useless scheme=weired with a hope to point to a netloc with swallow_logs(): ok_(is_url('weired://')) nok_(is_url('relative')) nok_(is_url('/absolute')) ok_(is_url('like@sshlogin')) # actually we do allow ssh:implicit urls ATM nok_(is_url('')) nok_(is_url(' ')) nok_(is_url(123)) # stuff of other types wouldn't be considered a URL # we can pass RI instance directly ok_(is_url(RI('file://localhost/some'))) nok_(is_url(RI('relative')))
def _test_debug(msg, args=()): if 'empty' in args: def node(d): # just so Python marks it as a generator if False: yield d # pragma: no cover else: return else: def node(d): yield updated(d, {'debugged': True}) d1 = debug(node, *args) data = {'data': 1} with patch('pdb.set_trace') as set_trace: with swallow_logs(new_level=logging.INFO) as cml: list(d1(data)) set_trace.assert_called_once_with() cml.assert_logged(msg, level='INFO')
def test_install_skip_failed_recursive(src, path): # install top level: ds = install(path, source=src) sub1 = Dataset(opj(path, 'subm 1')) sub2 = Dataset(opj(path, 'subm 2')) # sabotage recursive installation of 'subm 1' by polluting the target: with open(opj(path, 'subm 1', 'blocking.txt'), "w") as f: f.write("sdfdsf") with swallow_logs(new_level=logging.WARNING) as cml: result = ds.get(os.curdir, recursive=True) # toplevel dataset was in the house already assert_not_in(ds, result) assert_in(sub2, result) assert_not_in(sub1, result) cml.assert_logged( msg="Target {} already exists and is not an installed dataset. Skipped.".format(sub1.path), regex=False, level='WARNING')
def test_run_explicit(path): ds = Dataset(path) assert_false(ds.repo.file_has_content("test-annex.dat")) create_tree(ds.path, {"dirt_untracked": "untracked", "dirt_modified": "modified"}) ds.add("dirt_modified", to_git=True) with open(opj(path, "dirt_modified"), "a") as ofh: ofh.write(", more") # We need explicit=True to run with dirty repo. assert_status("impossible", ds.run("cat test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], on_failure="ignore")) hexsha_initial = ds.repo.get_hexsha() # If we specify test-annex.dat as an input, it will be retrieved before the # run. ds.run("cat test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], explicit=True) ok_(ds.repo.file_has_content("test-annex.dat")) # We didn't commit anything because outputs weren't specified. assert_false(ds.repo.file_has_content("doubled.dat")) eq_(hexsha_initial, ds.repo.get_hexsha()) # If an input doesn't exist, we just show the standard warning. with swallow_logs(new_level=logging.WARN) as cml: ds.run("ls", inputs=["not-there"], explicit=True) assert_in("Input does not exist: ", cml.out) remove(opj(path, "doubled.dat")) hexsha_initial = ds.repo.get_hexsha() ds.run("cat test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], outputs=["doubled.dat"], explicit=True) ok_(ds.repo.file_has_content("doubled.dat")) ok_(ds.repo.is_dirty(path="dirt_modified")) neq_(hexsha_initial, ds.repo.get_hexsha())
def test_addurls_subdataset(self, path): ds = Dataset(path).create(force=True) with chpwd(path): for save in True, False: label = "save" if save else "nosave" ds.addurls(self.json_file, "{url}", "{subdir}-" + label + "//{name}", save=save) subdirs = ["{}-{}".format(d, label) for d in ["foo", "bar"]] subdir_files = dict(zip(subdirs, [["a", "c"], ["b"]])) for subds, fnames in subdir_files.items(): for fname in fnames: ok_exists(op.join(subds, fname)) if save: assert_repo_status(path) else: # The datasets are create and saved ... assert_repo_status(path, modified=subdirs) # but the downloaded files aren't. for subds, fnames in subdir_files.items(): assert_repo_status(subds, added=fnames) # Now save the "--nosave" changes and check that we have # all the subdatasets. ds.save() eq_( set( subdatasets(dataset=ds, recursive=True, result_xfm="relpaths")), {"foo-save", "bar-save", "foo-nosave", "bar-nosave"}) # We don't try to recreate existing subdatasets. with swallow_logs(new_level=logging.DEBUG) as cml: ds.addurls(self.json_file, "{url}", "{subdir}-nosave//{name}") assert_in("Not creating subdataset at existing path", cml.out)
def test_add_delete_after_and_drop(self): # To test that .tar gets removed # but that new stuff was added to annex repo. We know the key since default # backend and content remain the same key1 = 'SHA256E-s5--16d3ad1974655987dd7801d70659990b89bfe7e931a0a358964e64e901761cc0.dat' # previous state of things: prev_files = list(find_files('.*', self.annex.path)) with assert_raises(Exception), \ swallow_logs(): self.annex.whereis(key1, key=True, output='full') commits_prior = list(self.annex.get_branch_commits_('git-annex')) add_archive_content('1.tar', annex=self.annex, strip_leading_dirs=True, delete_after=True) commits_after = list(self.annex.get_branch_commits_('git-annex')) # There should be a single commit for all additions +1 to initiate datalad-archives gh-1258 # If faking dates, there should be another +1 because # annex.alwayscommit isn't set to false. assert_equal( len(commits_after), # We expect one more when faking dates because # annex.alwayscommit isn't set to false. len(commits_prior) + 2 + self.annex.fake_dates_enabled) assert_equal(prev_files, list(find_files('.*', self.annex.path))) w = self.annex.whereis(key1, key=True, output='full') assert_equal(len(w), 2) # in archive, and locally since we didn't drop # Let's now do the same but also drop content add_archive_content('1.tar', annex=self.annex, strip_leading_dirs=True, delete_after=True, drop_after=True) assert_equal(prev_files, list(find_files('.*', self.annex.path))) w = self.annex.whereis(key1, key=True, output='full') assert_equal(len(w), 1) # in archive # there should be no .datalad temporary files hanging around self.assert_no_trash_left_behind()
def _check_ri(ri, cls, exact_str=True, localpath=None, **fields): """just a helper to carry out few checks on urls""" with swallow_logs(new_level=logging.DEBUG) as cml: ri_ = cls(**fields) murl = RI(ri) eq_(murl.__class__, cls) # not just a subclass eq_(murl, ri_) if isinstance(ri, str): eq_(str(RI(ri)), ri) eq_(eval(repr(ri_)), ri) # repr leads back to identical ri_ eq_(ri, ri_) # just in case ;) above should fail first if smth is wrong if not exact_str: assert_in('Parsed version of', cml.out) (eq_ if exact_str else neq_)( str(ri), str(ri_)) # that we can reconstruct it EXACTLY on our examples # and that we have access to all those fields nok_(set(fields).difference(set(cls._FIELDS))) for f, v in fields.items(): eq_(getattr(ri_, f), v) if localpath: eq_(ri_.localpath, localpath) old_localpath = ri_.localpath # for a test below else: # if not given -- must be a remote url, should raise exception with assert_raises(ValueError): ri_.localpath # This one does not have a path. TODO: either proxy path from its .RI or adjust # hierarchy of classes to make it more explicit if cls == GitTransportRI: return # do changes in the path persist? old_str = str(ri_) ri_.path = newpath = opj(ri_.path, 'sub') eq_(ri_.path, newpath) neq_(str(ri_), old_str) if localpath: eq_(ri_.localpath, opj(old_localpath, 'sub'))
def test_log_progress_noninteractive_filter(): name = "dl-test" lgr = LoggerHelper(name).get_initialized_logger() pbar_id = "lp_test" with swallow_logs(new_level=logging.INFO, name=name) as cml: log_progress(lgr.info, pbar_id, "Start", label="testing", total=3) log_progress(lgr.info, pbar_id, "THERE0", update=1) log_progress(lgr.info, pbar_id, "NOT", update=1, noninteractive_level=logging.DEBUG) log_progress(lgr.info, pbar_id, "THERE1", update=1, noninteractive_level=logging.INFO) log_progress(lgr.info, pbar_id, "Done") for present in ["Start", "THERE0", "THERE1", "Done"]: assert_in(present, cml.out) assert_not_in("NOT", cml.out)
def _test_debug(msg, args=()): if 'empty' in args: def node(d): # just so Python marks it as a generator if False: yield d # pragma: no cover else: return else: def node(d): yield updated(d, {'debugged': True}) d1 = debug(node, *args) data = {'data': 1} with patch('pdb.set_trace') as set_trace: with swallow_logs(new_level=logging.INFO) as cml: list(d1(data)) set_trace.assert_called_once_with() cml.assert_logged(msg, level='INFO')
def test_rerun_invalid_merge_run_commit(path): ds = Dataset(path).create() ds.run("echo foo >>foo") ds.run("echo invalid >>invalid") run_msg = last_commit_msg(ds.repo) run_hexsha = ds.repo.get_hexsha() ds.repo.call_git(["reset", "--hard", DEFAULT_BRANCH + "~"]) with open(op.join(ds.path, "non-run"), "w") as nrfh: nrfh.write("non-run") ds.save() # Assign two parents to the invalid run commit. commit = ds.repo.call_git_oneline( ["commit-tree", run_hexsha + "^{tree}", "-m", run_msg, "-p", run_hexsha + "^", "-p", ds.repo.get_hexsha()]) ds.repo.call_git(["reset", "--hard", commit]) hexsha_orig = ds.repo.get_hexsha() with swallow_logs(new_level=logging.WARN) as cml: ds.rerun(since="") assert_in("has run information but is a merge commit", cml.out) eq_(len(ds.repo.get_revisions(hexsha_orig + ".." + DEFAULT_BRANCH)), 1)
def test_ssh_custom_identity_file(): ifile = "/tmp/dl-test-ssh-id" # Travis if not op.exists(ifile): raise SkipTest( "Travis-specific '{}' identity file does not exist".format(ifile)) with patch_config({"datalad.ssh.identityfile": ifile}): with swallow_logs(new_level=logging.DEBUG) as cml: manager = SSHManager() ssh = manager.get_connection('ssh://datalad-test') cmd_out, _ = ssh("echo blah") if _ssh_manager_is_multiplex: expected_socket = op.join( str(manager.socket_dir), get_connection_hash("datalad-test", identity_file=ifile, bundled=True)) ok_(exists(expected_socket)) manager.close() assert_in("-i", cml.out) assert_in(ifile, cml.out)
def test_ssh_manager_close_no_throw(bogus_socket): manager = SSHManager() class bogus: def close(self): raise Exception("oh I am so bad") @property def ctrl_path(self): with open(bogus_socket, "w") as f: f.write("whatever") return bogus_socket manager._connections['bogus'] = bogus() assert_raises(Exception, manager.close) assert_raises(Exception, manager.close) # but should proceed just fine if allow_fail=False with swallow_logs(new_level=logging.DEBUG) as cml: manager.close(allow_fail=False) assert_in('Failed to close a connection: oh I am so bad', cml.out)
def test_runner_dry(tempfile): dry = DryRunProtocol() runner = Runner(protocol=dry) # test dry command call cmd = 'echo Testing äöü東 dry run > %s' % tempfile with swallow_logs(new_level=5) as cml: ret = runner.run(cmd) cml.assert_logged("{DryRunProtocol} Running: %s" % cmd, regex=False) assert_equal(("DRY", "DRY"), ret, "Output of dry run (%s): %s" % (cmd, ret)) assert_equal(split_cmdline(cmd), dry[0]['command']) assert_false(os.path.exists(tempfile)) # test dry python function call output = runner.call(os.path.join, 'foo', 'bar') assert_is(None, output, "Dry call of: os.path.join, 'foo', 'bar' " "returned: %s" % output) assert_in('join', dry[1]['command'][0]) assert_equal("args=('foo', 'bar')", dry[1]['command'][1])
def _test_target_ssh_inherit(standardgroup, src_path, target_path): ds = Dataset(src_path).create() target_url = 'localhost:%s' % target_path remote = "magical" ds.create_sibling(target_url, name=remote, shared='group') # not doing recursively if standardgroup: ds.repo.set_preferred_content('wanted', 'standard', remote) ds.repo.set_preferred_content('group', standardgroup, remote) ds.publish(to=remote) # now a month later we created a new subdataset subds = ds.create('sub') # so now we got a hierarchy! create_tree(subds.path, {'sub.dat': 'lots of data'}) subds.add('sub.dat') ok_file_under_git(subds.path, 'sub.dat', annexed=True) target_sub = Dataset(opj(target_path, 'sub')) # since we do not have yet/thus have not used an option to record to publish # to that sibling by default (e.g. --set-upstream), if we run just ds.publish # -- should fail assert_raises(InsufficientArgumentsError, ds.publish) ds.publish(to=remote) # should be ok, non recursive; BUT it (git or us?) would # create an empty sub/ directory ok_(not target_sub.is_installed()) # still not there with swallow_logs(): # so no warnings etc assert_raises(ValueError, ds.publish, recursive=True) # since remote doesn't exist ds.publish(to=remote, recursive=True, missing='inherit') # we added the remote and set all the eq_(subds.repo.get_preferred_content('wanted', remote), 'standard' if standardgroup else '') eq_(subds.repo.get_preferred_content('group', remote), standardgroup or '') ok_(target_sub.is_installed()) # it is there now eq_(target_sub.repo.config.get('core.sharedrepository'), '1') # and we have transferred the content if standardgroup and standardgroup == 'backup': # only then content should be copied ok_file_has_content(opj(target_sub.path, 'sub.dat'), 'lots of data') else: # otherwise nothing is copied by default assert_false(target_sub.repo.file_has_content('sub.dat'))
def test_cfg_originorigin(path): path = Path(path) origin = Dataset(path / 'origin').create() (origin.pathobj / 'file1.txt').write_text('content') origin.save() clone_lev1 = clone(origin, path / 'clone_lev1') clone_lev2 = clone(clone_lev1, path / 'clone_lev2') # the goal is to be able to get file content from origin without # the need to configure it manually assert_result_count( clone_lev2.get('file1.txt', on_failure='ignore'), 1, action='get', status='ok', path=str(clone_lev2.pathobj / 'file1.txt'), ) eq_((clone_lev2.pathobj / 'file1.txt').read_text(), 'content') eq_( Path( clone_lev2.siblings('query', name='origin-2', return_type='item-or-list')['url']), origin.pathobj) # Clone another level, this time with a relative path. Drop content from # lev2 so that origin is the only place that the file is available from. clone_lev2.drop("file1.txt") with chpwd(path), swallow_logs(new_level=logging.DEBUG) as cml: clone_lev3 = clone('clone_lev2', 'clone_lev3') # we called git-annex-init; see gh-4367: cml.assert_logged( msg=r"[^[]*Async run \[('git', 'annex'|'git-annex'), " r"'init'", match=False, level='DEBUG') assert_result_count(clone_lev3.get('file1.txt', on_failure='ignore'), 1, action='get', status='ok', path=str(clone_lev3.pathobj / 'file1.txt'))
def test_install_skip_failed_recursive(src, path): # install top level: ds = install(path, source=src) sub1 = Dataset(opj(path, 'subm 1')) sub2 = Dataset(opj(path, 'subm 2')) # sabotage recursive installation of 'subm 1' by polluting the target: with open(opj(path, 'subm 1', 'blocking.txt'), "w") as f: f.write("sdfdsf") with swallow_logs(new_level=logging.WARNING) as cml: result = ds.get(os.curdir, recursive=True) # toplevel dataset was in the house already assert_not_in(ds, result) assert_in(sub2, result) assert_not_in(sub1, result) cml.assert_logged( msg= "Target {} already exists and is not an installed dataset. Skipped." .format(sub1.path), regex=False, level='WARNING')
def test_addurls_version(self, path): ds = Dataset(path).rev_create(force=True) def version_fn(url): if url.endswith("b.dat"): raise ValueError("Scheme error") return url + ".v1" with patch("datalad.plugin.addurls.get_versioned_url", version_fn): with swallow_logs(new_level=logging.WARNING) as cml: ds.addurls(self.json_file, "{url}", "{name}", version_urls=True) assert_in("b.dat", str(cml.out)) names = ["a", "c"] for fname in names: ok_exists(os.path.join(path, fname)) whereis = ds.repo.whereis(names, output="full") for fname, info in whereis.items(): eq_(info[ds.repo.WEB_UUID]['urls'], ["{}udir/{}.dat.v1".format(self.url, fname)])
def test_addurls_version(self, path): ds = Dataset(path).create(force=True) def version_fn(url): if url.endswith("b.dat"): raise ValueError("Scheme error") return url + ".v1" with patch("datalad.plugin.addurls.get_versioned_url", version_fn): with swallow_logs(new_level=logging.WARNING) as cml: ds.addurls(self.json_file, "{url}", "{name}", version_urls=True) assert_in("b.dat", str(cml.out)) names = ["a", "c"] for fname in names: ok_exists(os.path.join(path, fname)) whereis = ds.repo.whereis(names, output="full") for fname, info in whereis.items(): eq_(info[ds.repo.WEB_UUID]['urls'], ["{}udir/{}.dat.v1".format(self.url, fname)])
def test_ssh_manager_close_no_throw(bogus_socket): manager = SSHManager() class bogus: def close(self): raise Exception("oh I am so bad") @property def ctrl_path(self): with open(bogus_socket, "w") as f: f.write("whatever") return bogus_socket # since we are digging into protected area - should also set _prev_connections manager._prev_connections = {} manager._connections['bogus'] = bogus() assert_raises(Exception, manager.close) assert_raises(Exception, manager.close) # but should proceed just fine if allow_fail=False with swallow_logs(new_level=logging.DEBUG) as cml: manager.close(allow_fail=False) assert_in('Failed to close a connection: oh I am so bad', cml.out)
def test_install_skip_failed_recursive(src, path): # install top level: ds = install(path, source=src) sub1 = Dataset(opj(path, 'subm 1')) sub2 = Dataset(opj(path, '2')) # sabotage recursive installation of 'subm 1' by polluting the target: with open(opj(path, 'subm 1', 'blocking.txt'), "w") as f: f.write("sdfdsf") with swallow_logs(new_level=logging.WARNING) as cml: result = ds.get( os.curdir, recursive=True, on_failure='ignore', result_xfm=None) # toplevel dataset was in the house already assert_result_count( result, 0, path=ds.path, type='dataset') # subm 1 should fail to install. [1] since comes after '2' submodule assert_in_results( result, status='error', path=sub1.path, type='dataset', message='target path already exists and not empty, refuse to ' 'clone into target path') assert_in_results(result, status='ok', path=sub2.path)
def test_addurls_dry_run(path): ds = Dataset(path).create(force=True) with chpwd(path): json_file = "links.json" with open(json_file, "w") as jfh: json.dump([{ "url": "URL/a.dat", "name": "a", "subdir": "foo" }, { "url": "URL/b.dat", "name": "b", "subdir": "bar" }, { "url": "URL/c.dat", "name": "c", "subdir": "foo" }], jfh) ds.add(".", message="setup") with swallow_logs(new_level=logging.INFO) as cml: ds.addurls(json_file, "{url}", "{subdir}//{_url_filename_root}", dry_run=True) for dir_ in ["foo", "bar"]: assert_in("Would create a subdataset at {}".format(dir_), cml.out) assert_in( "Would download URL/a.dat to {}".format( os.path.join(path, "foo", "BASE")), cml.out) assert_in("Metadata: {}".format([u"name=a", u"subdir=foo"]), cml.out)
def test_addurls_subdataset(self, path): ds = Dataset(path).create(force=True) with chpwd(path): for save in True, False: label = "save" if save else "nosave" ds.addurls(self.json_file, "{url}", "{subdir}-" + label + "//{name}", save=save) subdirs = ["{}-{}".format(d, label) for d in ["foo", "bar"]] subdir_files = dict(zip(subdirs, [["a", "c"], ["b"]])) for subds, fnames in subdir_files.items(): for fname in fnames: ok_exists(op.join(subds, fname)) if save: assert_repo_status(path) else: # The datasets are create and saved ... assert_repo_status(path, modified=subdirs) # but the downloaded files aren't. for subds, fnames in subdir_files.items(): assert_repo_status(subds, added=fnames) # Now save the "--nosave" changes and check that we have # all the subdatasets. ds.save() eq_(set(subdatasets(dataset=ds, recursive=True, result_xfm="relpaths")), {"foo-save", "bar-save", "foo-nosave", "bar-nosave"}) # We don't try to recreate existing subdatasets. with swallow_logs(new_level=logging.DEBUG) as cml: ds.addurls(self.json_file, "{url}", "{subdir}-nosave//{name}") assert_in("Not creating subdataset at existing path", cml.out)
def test_globbedpaths(path): for patterns, expected in [(["1.txt", "2.dat"], {"1.txt", "2.dat"}), (["*.txt", "*.dat"], {"1.txt", "2.dat", "3.txt"}), (["*.txt"], {"1.txt", "3.txt"})]: gp = GlobbedPaths(patterns, pwd=path) eq_(set(gp.expand()), expected) eq_(set(gp.expand(full=True)), {opj(path, p) for p in expected}) # Full patterns still get returned as relative to pwd. gp = GlobbedPaths([opj(path, "*.dat")], pwd=path) eq_(gp.expand(), ["2.dat"]) # "." gets special treatment. gp = GlobbedPaths([".", "*.dat"], pwd=path) eq_(set(gp.expand()), {"2.dat", "."}) eq_(gp.expand(dot=False), ["2.dat"]) gp = GlobbedPaths(["."], pwd=path, expand=False) eq_(gp.expand(), ["."]) eq_(gp.paths, ["."]) # We can the glob outputs. glob_results = {"z": "z", "a": ["x", "d", "b"]} with patch('datalad.interface.run.glob', glob_results.get): gp = GlobbedPaths(["z", "a"]) eq_(gp.expand(), ["z", "b", "d", "x"]) # glob expansion for paths property is determined by expand argument. for expand, expected in [(True, ["2.dat"]), (False, ["*.dat"])]: gp = GlobbedPaths(["*.dat"], pwd=path, expand=expand) eq_(gp.paths, expected) with swallow_logs(new_level=logging.WARN) as cml: GlobbedPaths(["not here"], pwd=path).expand() assert_in("No matching files found for 'not here'", cml.out) GlobbedPaths(["also not"], pwd=path, warn=False).expand() assert_not_in("No matching files found for 'also not'", cml.out)
def test_install_skip_failed_recursive(src, path): # install top level: ds = install(path, source=src) sub1 = Dataset(opj(path, 'subm 1')) sub2 = Dataset(opj(path, 'subm 2')) # sabotage recursive installation of 'subm 1' by polluting the target: with open(opj(path, 'subm 1', 'blocking.txt'), "w") as f: f.write("sdfdsf") with swallow_logs(new_level=logging.WARNING) as cml: result = ds.get(os.curdir, recursive=True, on_failure='ignore', result_xfm=None) # toplevel dataset was in the house already assert_result_count(result, 0, path=ds.path, type='dataset') assert_status('error', [result[0]]) assert_in_results(result, status='ok', path=sub2.path) cml.assert_logged( msg="target path already exists and not empty".format(sub1.path), regex=False, level='ERROR')
def test_ssh_custom_identity_file(): ifile = "/tmp/dl-test-ssh-id" # Travis if not op.exists(ifile): raise SkipTest("Travis-specific '{}' identity file does not exist" .format(ifile)) from datalad import cfg try: with patch.dict("os.environ", {"DATALAD_SSH_IDENTITYFILE": ifile}): cfg.reload(force=True) with swallow_logs(new_level=logging.DEBUG) as cml: manager = SSHManager() ssh = manager.get_connection('ssh://localhost') cmd_out, _ = ssh("echo blah") expected_socket = op.join( manager.socket_dir, get_connection_hash("localhost", identity_file=ifile)) ok_(exists(expected_socket)) manager.close() assert_in("-i", cml.out) assert_in(ifile, cml.out) finally: # Prevent overridden DATALAD_SSH_IDENTITYFILE from lingering. cfg.reload(force=True)
def test_install_simple_local(src, path): origin = Dataset(path) # now install it somewhere else ds = install(path, source=src) eq_(ds.path, path) ok_(ds.is_installed()) if not isinstance(origin.repo, AnnexRepo): # this means it is a GitRepo ok_(isinstance(origin.repo, GitRepo)) # stays plain Git repo ok_(isinstance(ds.repo, GitRepo)) ok_(not isinstance(ds.repo, AnnexRepo)) ok_(GitRepo.is_valid_repo(ds.path)) eq_(set(ds.repo.get_indexed_files()), {'test.dat', 'INFO.txt'}) ok_clean_git(path, annex=False) else: # must be an annex ok_(isinstance(ds.repo, AnnexRepo)) ok_(AnnexRepo.is_valid_repo(ds.path, allow_noninitialized=False)) eq_(set(ds.repo.get_indexed_files()), {'test.dat', 'INFO.txt', 'test-annex.dat'}) ok_clean_git(path, annex=True) # no content was installed: ok_(not ds.repo.file_has_content('test-annex.dat')) uuid_before = ds.repo.uuid # installing it again, shouldn't matter: with swallow_logs(new_level=logging.INFO) as cml: ds = install(path, source=src) cml.assert_logged(msg="{0} was already installed from".format(ds), regex=False, level="INFO") ok_(ds.is_installed()) if isinstance(origin.repo, AnnexRepo): eq_(uuid_before, ds.repo.uuid)
def test_target_ssh_simple(origin, src_path, target_rootpath): # prepare src source = install(src_path, source=origin, result_xfm='datasets', return_type='item-or-list') target_path = opj(target_rootpath, "basic") with swallow_logs(new_level=logging.ERROR) as cml: create_sibling(dataset=source, name="local_target", sshurl="ssh://localhost:22", target_dir=target_path, ui=True) assert_not_in('enableremote local_target failed', cml.out) GitRepo(target_path, create=False) # raises if not a git repo assert_in("local_target", source.repo.get_remotes()) # Both must be annex or git repositories src_is_annex = AnnexRepo.is_valid_repo(src_path) eq_(src_is_annex, AnnexRepo.is_valid_repo(target_path)) # And target one should be known to have a known UUID within the source if annex if src_is_annex: annex = AnnexRepo(src_path) local_target_cfg = annex.repo.remotes["local_target"].config_reader.get # basic config in place eq_(local_target_cfg('annex-ignore'), 'false') ok_(local_target_cfg('annex-uuid')) # do it again without force, but use a different name to avoid initial checks # for existing remotes: with assert_raises(RuntimeError) as cm: assert_create_sshwebserver(dataset=source, name="local_target_alt", sshurl="ssh://localhost", target_dir=target_path) ok_( text_type(cm.exception).startswith( "Target path %s already exists. And it fails to rmdir" % target_path)) if src_is_annex: target_description = AnnexRepo(target_path, create=False).get_description() assert_not_equal(target_description, None) assert_not_equal(target_description, target_path) # on yoh's laptop TMPDIR is under HOME, so things start to become # tricky since then target_path is shortened and we would need to know # remote $HOME. To not over-complicate and still test, test only for # the basename of the target_path ok_endswith(target_description, basename(target_path)) # now, with force and correct url, which is also used to determine # target_dir # Note: on windows absolute path is not url conform. But this way it's easy # to test, that ssh path is correctly used. if not on_windows: # add random file under target_path, to explicitly test existing=replace open(opj(target_path, 'random'), 'w').write('123') assert_create_sshwebserver(dataset=source, name="local_target", sshurl="ssh://localhost" + target_path, publish_by_default='master', existing='replace') eq_("ssh://localhost" + urlquote(target_path), source.repo.get_remote_url("local_target")) ok_(source.repo.get_remote_url("local_target", push=True) is None) # ensure target tree actually replaced by source assert_false(exists(opj(target_path, 'random'))) if src_is_annex: annex = AnnexRepo(src_path) local_target_cfg = annex.repo.remotes[ "local_target"].config_reader.get eq_(local_target_cfg('annex-ignore'), 'false') eq_(local_target_cfg('annex-uuid').count('-'), 4) # valid uuid # should be added too, even if URL matches prior state eq_(local_target_cfg('push'), 'master') # again, by explicitly passing urls. Since we are on localhost, the # local path should work: cpkwargs = dict( dataset=source, name="local_target", sshurl="ssh://localhost", target_dir=target_path, target_url=target_path, target_pushurl="ssh://localhost" + target_path, ui=True, ) assert_create_sshwebserver(existing='replace', **cpkwargs) if src_is_annex: target_description = AnnexRepo(target_path, create=False).get_description() eq_(target_description, target_path) eq_(target_path, source.repo.get_remote_url("local_target")) eq_("ssh://localhost" + target_path, source.repo.get_remote_url("local_target", push=True)) _test_correct_publish(target_path) # now, push should work: publish(dataset=source, to="local_target") # and we should be able to 'reconfigure' def process_digests_mtimes(digests, mtimes): # it should have triggered a hook, which would have created log and metadata files check_metadata = False for part in 'logs', 'metadata': metafiles = [ k for k in digests if k.startswith(_path_('.git/datalad/%s/' % part)) ] # This is in effect ONLY if we have "compatible" datalad installed on remote # end. ATM we don't have easy way to guarantee that AFAIK (yoh), # so let's not check/enforce (TODO) # assert(len(metafiles) >= 1) # we might have 2 logs if timestamps do not collide ;) # Let's actually do it to some degree if part == 'logs': # always should have those: assert (len(metafiles) >= 1) with open(opj(target_path, metafiles[0])) as f: if 'no datalad found' not in f.read(): check_metadata = True if part == 'metadata': eq_(len(metafiles), bool(check_metadata)) for f in metafiles: digests.pop(f) mtimes.pop(f) # and just pop some leftovers from annex for f in list(digests): if f.startswith('.git/annex/mergedrefs'): digests.pop(f) mtimes.pop(f) orig_digests, orig_mtimes = get_mtimes_and_digests(target_path) process_digests_mtimes(orig_digests, orig_mtimes) import time time.sleep(0.1) # just so that mtimes change assert_create_sshwebserver(existing='reconfigure', **cpkwargs) digests, mtimes = get_mtimes_and_digests(target_path) process_digests_mtimes(digests, mtimes) assert_dict_equal(orig_digests, digests) # nothing should change in terms of content # but some files should have been modified modified_files = { k for k in mtimes if orig_mtimes.get(k, 0) != mtimes.get(k, 0) } # collect which files were expected to be modified without incurring any changes ok_modified_files = { _path_('.git/hooks/post-update'), 'index.html', # files which hook would manage to generate _path_('.git/info/refs'), '.git/objects/info/packs' } # on elderly git we don't change receive setting ok_modified_files.add(_path_('.git/config')) ok_modified_files.update( {f for f in digests if f.startswith(_path_('.git/datalad/web'))}) # it seems that with some recent git behavior has changed a bit # and index might get touched if _path_('.git/index') in modified_files: ok_modified_files.add(_path_('.git/index')) assert_set_equal(modified_files, ok_modified_files)
def test_run_inputs_outputs(src, path): for subds in [("s0", "s1_0", "s2"), ("s0", "s1_1", "s2"), ("s0", "s1_0"), ("s0", "s1_1"), ("s0", "ss"), ("s0", )]: Dataset(op.join(*((src, ) + subds))).create(force=True) src_ds = Dataset(src).create(force=True) src_ds.add(".", recursive=True) ds = install(path, source=src, result_xfm='datasets', return_type='item-or-list') assert_false(ds.repo.file_has_content("input.dat")) assert_false(ds.repo.file_has_content("extra-input.dat")) # The specified inputs and extra inputs will be retrieved before the run. # (Use run_command() to access the extra_inputs argument.) list( run_command("cat {inputs} {inputs} >doubled.dat", dataset=ds, inputs=["input.dat"], extra_inputs=["extra-input.dat"])) ok_clean_git(ds.path) ok_(ds.repo.file_has_content("input.dat")) ok_(ds.repo.file_has_content("extra-input.dat")) ok_(ds.repo.file_has_content("doubled.dat")) with open(opj(path, "doubled.dat")) as fh: content = fh.read() assert_in("input", content) assert_not_in("extra-input", content) # Rerunning the commit will also get the input file. ds.repo.drop(["input.dat", "extra-input.dat"], options=["--force"]) assert_false(ds.repo.file_has_content("input.dat")) assert_false(ds.repo.file_has_content("extra-input.dat")) ds.rerun() ok_(ds.repo.file_has_content("input.dat")) ok_(ds.repo.file_has_content("extra-input.dat")) with swallow_logs(new_level=logging.WARN) as cml: ds.run("touch dummy", inputs=["not-there"]) assert_in("Input does not exist: ", cml.out) # Test different combinations of globs and explicit files. inputs = ["a.dat", "b.dat", "c.txt", "d.txt"] create_tree(ds.path, {i: i for i in inputs}) ds.add(".") ds.repo.copy_to(inputs, remote="origin") ds.repo.drop(inputs, options=["--force"]) test_cases = [(["*.dat"], ["a.dat", "b.dat"]), (["*.dat", "c.txt"], ["a.dat", "b.dat", "c.txt"]), (["*"], inputs)] for idx, (inputs_arg, expected_present) in enumerate(test_cases): assert_false(any(ds.repo.file_has_content(i) for i in inputs)) ds.run("touch dummy{}".format(idx), inputs=inputs_arg) ok_(all(ds.repo.file_has_content(f) for f in expected_present)) # Globs are stored unexpanded by default. assert_in(inputs_arg[0], ds.repo.format_commit("%B")) ds.repo.drop(inputs, options=["--force"]) # --input can be passed a subdirectory. create_tree(ds.path, {"subdir": {"a": "subdir a", "b": "subdir b"}}) ds.add("subdir") ds.repo.copy_to(["subdir/a", "subdir/b"], remote="origin") ds.repo.drop("subdir", options=["--force"]) ds.run("touch subdir-dummy", inputs=[opj(ds.path, "subdir")]) ok_(all(ds.repo.file_has_content(opj("subdir", f)) for f in ["a", "b"])) # Inputs are specified relative to a dataset's subdirectory. ds.repo.drop(opj("subdir", "a"), options=["--force"]) with chpwd(opj(path, "subdir")): run("touch subdir-dummy1", inputs=["a"]) ok_(ds.repo.file_has_content(opj("subdir", "a"))) # --input=. runs "datalad get ." ds.run("touch dot-dummy", inputs=["."]) eq_(ds.repo.get_annexed_files(), ds.repo.get_annexed_files(with_content_only=True)) # On rerun, we get all files, even those that weren't in the tree at the # time of the run. create_tree(ds.path, {"after-dot-run": "after-dot-run content"}) ds.add(".") ds.repo.copy_to(["after-dot-run"], remote="origin") ds.repo.drop(["after-dot-run"], options=["--force"]) ds.rerun("HEAD^") ds.repo.file_has_content("after-dot-run") # --output will unlock files that are present. ds.repo.get("a.dat") ds.run("echo ' appended' >>a.dat", outputs=["a.dat"]) with open(opj(path, "a.dat")) as fh: eq_(fh.read(), "a.dat appended\n") # --output will remove files that are not present. ds.repo.drop(["a.dat", "d.txt"], options=["--force"]) ds.run("echo ' appended' >>a.dat", outputs=["a.dat"]) with open(opj(path, "a.dat")) as fh: eq_(fh.read(), " appended\n") # --input can be combined with --output. ds.repo.repo.git.reset("--hard", "HEAD~2") ds.run("echo ' appended' >>a.dat", inputs=["a.dat"], outputs=["a.dat"]) with open(opj(path, "a.dat")) as fh: eq_(fh.read(), "a.dat appended\n") with swallow_logs(new_level=logging.DEBUG) as cml: with swallow_outputs(): ds.run("echo blah", outputs=["not-there"]) assert_in("Filtered out non-existing path: ", cml.out) ds.create('sub') ds.run("echo sub_orig >sub/subfile") ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"]) ds.drop("sub/subfile", check=False) ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"]) # --input/--output globs can be stored in expanded form. ds.run("touch expand-dummy", inputs=["a.*"], outputs=["b.*"], expand="both") assert_in("a.dat", ds.repo.format_commit("%B")) assert_in("b.dat", ds.repo.format_commit("%B")) res = ds.rerun(report=True, return_type='item-or-list') eq_(res["run_info"]['inputs'], ["a.dat"]) eq_(res["run_info"]['outputs'], ["b.dat"]) # We install subdatasets to fully resolve globs. ds.uninstall("s0") assert_false(Dataset(op.join(path, "s0")).is_installed()) ds.run("echo {inputs} >globbed-subds", inputs=["s0/s1_*/s2/*.dat"]) ok_file_has_content(op.join(ds.path, "globbed-subds"), "s0/s1_0/s2/a.dat s0/s1_1/s2/c.dat", strip=True) ds_ss = Dataset(op.join(path, "s0", "ss")) assert_false(ds_ss.is_installed()) ds.run("echo blah >{outputs}", outputs=["s0/ss/out"]) ok_(ds_ss.is_installed()) ok_file_has_content(op.join(ds.path, "s0", "ss", "out"), "blah", strip=True)
def test_target_ssh_simple(origin, src_path, target_rootpath): # prepare src source = install(src_path, source=origin) target_path = opj(target_rootpath, "basic") # it will try to fetch it so would fail as well since sshurl is wrong with swallow_logs(new_level=logging.ERROR) as cml, \ assert_raises(GitCommandError): create_sibling(dataset=source, target="local_target", sshurl="ssh://localhost", target_dir=target_path, ui=True) # is not actually happening on one of the two basic cases -- TODO figure it out # assert_in('enableremote local_target failed', cml.out) GitRepo(target_path, create=False) # raises if not a git repo assert_in("local_target", source.repo.get_remotes()) eq_("ssh://localhost", source.repo.get_remote_url("local_target")) # should NOT be able to push now, since url isn't correct: # TODO: assumption is wrong if ~ does have .git! fix up! assert_raises(GitCommandError, publish, dataset=source, to="local_target") # Both must be annex or git repositories src_is_annex = AnnexRepo.is_valid_repo(src_path) eq_(src_is_annex, AnnexRepo.is_valid_repo(target_path)) # And target one should be known to have a known UUID within the source if annex if src_is_annex: annex = AnnexRepo(src_path) local_target_cfg = annex.repo.remotes["local_target"].config_reader.get # for some reason this was "correct" # eq_(local_target_cfg('annex-ignore'), 'false') # but after fixing creating siblings in # 21f6dd012b2c7b9c0b8b348dcfb3b0ace7e8b2ec it started to fail # I think it is legit since we are trying to fetch now before calling # annex.enable_remote so it doesn't set it up, and fails before assert_raises(Exception, local_target_cfg, 'annex-ignore') # hm, but ATM wouldn't get a uuid since url is wrong assert_raises(Exception, local_target_cfg, 'annex-uuid') # do it again without force: with assert_raises(RuntimeError) as cm: assert_create_sshwebserver(dataset=source, target="local_target", sshurl="ssh://localhost", target_dir=target_path) eq_("Target directory %s already exists." % target_path, str(cm.exception)) if src_is_annex: target_description = AnnexRepo(target_path, create=False).get_description() assert_not_equal(target_description, None) assert_not_equal(target_description, target_path) ok_endswith(target_description, target_path) # now, with force and correct url, which is also used to determine # target_dir # Note: on windows absolute path is not url conform. But this way it's easy # to test, that ssh path is correctly used. if not on_windows: # add random file under target_path, to explicitly test existing=replace open(opj(target_path, 'random'), 'w').write('123') assert_create_sshwebserver(dataset=source, target="local_target", sshurl="ssh://localhost" + target_path, existing='replace') eq_("ssh://localhost" + target_path, source.repo.get_remote_url("local_target")) ok_(source.repo.get_remote_url("local_target", push=True) is None) # ensure target tree actually replaced by source assert_false(exists(opj(target_path, 'random'))) if src_is_annex: annex = AnnexRepo(src_path) local_target_cfg = annex.repo.remotes[ "local_target"].config_reader.get eq_(local_target_cfg('annex-ignore'), 'false') eq_(local_target_cfg('annex-uuid').count('-'), 4) # valid uuid # again, by explicitly passing urls. Since we are on localhost, the # local path should work: cpkwargs = dict( dataset=source, target="local_target", sshurl="ssh://localhost", target_dir=target_path, target_url=target_path, target_pushurl="ssh://localhost" + target_path, ui=True, ) assert_create_sshwebserver(existing='replace', **cpkwargs) if src_is_annex: target_description = AnnexRepo(target_path, create=False).get_description() eq_(target_description, target_path) eq_(target_path, source.repo.get_remote_url("local_target")) eq_("ssh://localhost" + target_path, source.repo.get_remote_url("local_target", push=True)) _test_correct_publish(target_path) # now, push should work: publish(dataset=source, to="local_target") # and we should be able to 'reconfigure' def process_digests_mtimes(digests, mtimes): # it should have triggered a hook, which would have created log and metadata files check_metadata = False for part in 'logs', 'metadata': metafiles = [ k for k in digests if k.startswith(_path_('.git/datalad/%s/' % part)) ] # This is in effect ONLY if we have "compatible" datalad installed on remote # end. ATM we don't have easy way to guarantee that AFAIK (yoh), # so let's not check/enforce (TODO) # assert(len(metafiles) >= 1) # we might have 2 logs if timestamps do not collide ;) # Let's actually do it to some degree if part == 'logs': # always should have those: assert (len(metafiles) >= 1) with open(opj(target_path, metafiles[0])) as f: if 'no datalad found' not in f.read(): check_metadata = True if part == 'metadata': eq_(len(metafiles), bool(check_metadata)) for f in metafiles: digests.pop(f) mtimes.pop(f) # and just pop some leftovers from annex for f in list(digests): if f.startswith('.git/annex/mergedrefs'): digests.pop(f) mtimes.pop(f) orig_digests, orig_mtimes = get_mtimes_and_digests(target_path) process_digests_mtimes(orig_digests, orig_mtimes) import time time.sleep(0.1) # just so that mtimes change assert_create_sshwebserver(existing='reconfigure', **cpkwargs) digests, mtimes = get_mtimes_and_digests(target_path) process_digests_mtimes(digests, mtimes) assert_dict_equal(orig_digests, digests) # nothing should change in terms of content # but some files should have been modified modified_files = { k for k in mtimes if orig_mtimes.get(k, 0) != mtimes.get(k, 0) } # collect which files were expected to be modified without incurring any changes ok_modified_files = { _path_('.git/hooks/post-update'), 'index.html', # files which hook would manage to generate _path_('.git/info/refs'), '.git/objects/info/packs' } if external_versions['cmd:system-git'] >= '2.4': # on elderly git we don't change receive setting ok_modified_files.add(_path_('.git/config')) ok_modified_files.update( {f for f in digests if f.startswith(_path_('.git/datalad/web'))}) assert_set_equal(modified_files, ok_modified_files)
def _test_version_check(host, dspath, store): dspath = Path(dspath) store = Path(store) ds = Dataset(dspath).create() populate_dataset(ds) ds.save() assert_repo_status(ds.path) # set up store: io = SSHRemoteIO(host) if host else LocalIO() if host: store_url = "ria+ssh://{host}{path}".format(host=host, path=store) else: store_url = "ria+{}".format(store.as_uri()) create_store(io, store, '1') # TODO: Re-establish test for version 1 # version 2: dirhash create_ds_in_store(io, store, ds.id, '2', '1') # add special remote init_opts = common_init_opts + ['url={}'.format(store_url)] ds.repo.init_remote('store', options=init_opts) ds.repo.copy_to('.', 'store') # check version files remote_ds_tree_version_file = store / 'ria-layout-version' dsgit_dir, archive_dir, dsobj_dir = \ get_layout_locations(1, store, ds.id) remote_obj_tree_version_file = dsgit_dir / 'ria-layout-version' assert_true(remote_ds_tree_version_file.exists()) assert_true(remote_obj_tree_version_file.exists()) with open(str(remote_ds_tree_version_file), 'r') as f: assert_equal(f.read().strip(), '1') with open(str(remote_obj_tree_version_file), 'r') as f: assert_equal(f.read().strip(), '2') # Accessing the remote should not yield any output regarding versioning, # since it's the "correct" version. Note that "fsck" is an arbitrary choice. # We need just something to talk to the special remote. with swallow_logs(new_level=logging.INFO) as cml: ds.repo.fsck(remote='store', fast=True) # TODO: For some reason didn't get cml.assert_logged to assert # "nothing was logged" assert not cml.out # Now fake-change the version with open(str(remote_obj_tree_version_file), 'w') as f: f.write('X\n') # Now we should see a message about it with swallow_logs(new_level=logging.INFO) as cml: ds.repo.fsck(remote='store', fast=True) cml.assert_logged(level="INFO", msg="Remote object tree reports version X", regex=False) # reading still works: ds.drop('.') assert_status('ok', ds.get('.')) # but writing doesn't: with open(str(Path(ds.path) / 'new_file'), 'w') as f: f.write("arbitrary addition") ds.save(message="Add a new_file") # TODO: use self.annex.error in special remote and see whether we get an # actual error result assert_raises(CommandError, ds.repo.copy_to, 'new_file', 'store') # However, we can force it by configuration ds.config.add("annex.ora-remote.store.force-write", "true", where='local') ds.repo.copy_to('new_file', 'store')
def test_plugin_call(path, dspath): # make plugins create_tree( path, { 'dlplugin_dummy.py': dummy_plugin, 'dlplugin_nodocs.py': nodocs_plugin, 'dlplugin_broken.py': broken_plugin, }) fake_dummy_spec = { 'dummy': {'file': opj(path, 'dlplugin_dummy.py')}, 'nodocs': {'file': opj(path, 'dlplugin_nodocs.py')}, 'broken': {'file': opj(path, 'dlplugin_broken.py')}, } with patch('datalad.plugin._get_plugins', return_value=fake_dummy_spec): with swallow_outputs() as cmo: plugin(showplugininfo=True) # hyphen spacing depends on the longest plugin name! # sorted # summary list generation doesn't actually load plugins for speed, # hence broken is not known to be broken here eq_(cmo.out, "broken [no synopsis] ({})\ndummy - real dummy ({})\nnodocs [no synopsis] ({})\n".format( fake_dummy_spec['broken']['file'], fake_dummy_spec['dummy']['file'], fake_dummy_spec['nodocs']['file'])) with swallow_outputs() as cmo: plugin(['dummy'], showpluginhelp=True) eq_(cmo.out.rstrip(), "Usage: dummy(dataset, noval, withval='test')\n\nmydocstring") with swallow_outputs() as cmo: plugin(['nodocs'], showpluginhelp=True) eq_(cmo.out.rstrip(), "Usage: nodocs()\n\nThis plugin has no documentation") # loading fails, no docs assert_raises(ValueError, plugin, ['broken'], showpluginhelp=True) # assume this most obscure plugin name is not used assert_raises(ValueError, plugin, '32sdfhvz984--^^') # broken plugin argument, must match Python keyword arg # specs assert_raises(ValueError, plugin, ['dummy', '1245']) def fake_is_installed(*args, **kwargs): return True with patch('datalad.plugin._get_plugins', return_value=fake_dummy_spec), \ patch('datalad.distribution.dataset.Dataset.is_installed', return_value=True): # does not trip over unsupported argument, they get filtered out, because # we carry all kinds of stuff with swallow_logs(new_level=logging.WARNING) as cml: res = list(plugin(['dummy', 'noval=one', 'obscure=some'])) assert_status('ok', res) cml.assert_logged( msg=".*Ignoring plugin argument\\(s\\).*obscure.*, not supported by plugin.*", regex=True, level='WARNING') # fails on missing positional arg assert_raises(TypeError, plugin, ['dummy']) # positional and kwargs actually make it into the plugin res = list(plugin(['dummy', 'noval=one', 'withval=two']))[0] eq_('one', res['args']['noval']) eq_('two', res['args']['withval']) # kwarg defaults are preserved res = list(plugin(['dummy', 'noval=one']))[0] eq_('test', res['args']['withval']) # repeated specification yields list input res = list(plugin(['dummy', 'noval=one', 'noval=two']))[0] eq_(['one', 'two'], res['args']['noval']) # can do the same thing while bypassing argument parsing for calls # from within python, and even preserve native python dtypes res = list(plugin(['dummy', ('noval', 1), ('noval', 'two')]))[0] eq_([1, 'two'], res['args']['noval']) # and we can further simplify in this case by passing lists right # away res = list(plugin(['dummy', ('noval', [1, 'two'])]))[0] eq_([1, 'two'], res['args']['noval']) # dataset arg handling # run plugin that needs a dataset where there is none with patch('datalad.plugin._get_plugins', return_value=fake_dummy_spec): ds = None with chpwd(dspath): assert_raises(ValueError, plugin, ['dummy', 'noval=one']) # create a dataset here, fixes the error ds = create() res = list(plugin(['dummy', 'noval=one']))[0] # gives dataset instance eq_(ds, res['args']['dataset']) # no do again, giving the dataset path # but careful, `dataset` is a proper argument res = list(plugin(['dummy', 'noval=one'], dataset=dspath))[0] eq_(ds, res['args']['dataset']) # however, if passed alongside the plugins args it also works res = list(plugin(['dummy', 'dataset={}'.format(dspath), 'noval=one']))[0] eq_(ds, res['args']['dataset']) # but if both are given, the proper args takes precedence assert_raises(ValueError, plugin, ['dummy', 'dataset={}'.format(dspath), 'noval=one'], dataset='rubbish')
def test_addurls_dropped_urls(self, path): ds = Dataset(path).create(force=True) with swallow_logs(new_level=logging.WARNING) as cml: ds.addurls(self.json_file, "", "{subdir}//{name}") assert_re_in(r".*Dropped [0-9]+ row\(s\) that had an empty URL", str(cml.out))
def test_url_samples(): _check_ri("http://example.com", URL, scheme='http', hostname="example.com") # "complete" one for classical http _check_ri("http://*****:*****@example.com:8080/p/sp?p1=v1&p2=v2#frag", URL, scheme='http', hostname="example.com", port=8080, username='******', password='******', path='/p/sp', query='p1=v1&p2=v2', fragment='frag') # sample one for ssh with specifying the scheme # XXX? might be useful? https://github.com/FriendCode/giturlparse.py _check_ri("ssh://host/path/sp1", URL, scheme='ssh', hostname='host', path='/path/sp1') _check_ri("user@host:path/sp1", SSHRI, hostname='host', path='path/sp1', username='******') _check_ri("host:path/sp1", SSHRI, hostname='host', path='path/sp1') _check_ri("host:path", SSHRI, hostname='host', path='path') _check_ri("host:/path", SSHRI, hostname='host', path='/path') _check_ri("user@host", SSHRI, hostname='host', username='******') # TODO!!! should this be a legit URL like this? # _check_ri("host", SSHRI, hostname='host')) eq_(repr(RI("host:path")), "SSHRI(hostname='host', path='path')") # And now perspective 'datalad', implicit=True urls pointing to the canonical center location _check_ri("///", DataLadRI) _check_ri("///p/s1", DataLadRI, path='p/s1') # could be considered by someone as "URI reference" relative to scheme _check_ri("//a/", DataLadRI, remote='a') _check_ri("//a/data", DataLadRI, path='data', remote='a') # here we will do custom magic allowing only schemes with + in them, such as dl+archive # or not so custom as _check_ri("hg+https://host/user/proj", URL, scheme="hg+https", hostname='host', path='/user/proj') # "old" style _check_ri("dl+archive:KEY/path/sp1#size=123", URL, scheme='dl+archive', path='KEY/path/sp1', fragment='size=123') # "new" style _check_ri("dl+archive:KEY#path=path/sp1&size=123", URL, scheme='dl+archive', path='KEY', fragment='path=path/sp1&size=123') # actually above one is probably wrong since we need to encode the path _check_ri("dl+archive:KEY#path=path%2Fbsp1&size=123", URL, scheme='dl+archive', path='KEY', fragment='path=path%2Fbsp1&size=123') #https://en.wikipedia.org/wiki/File_URI_scheme _check_ri("file://host", URL, scheme='file', hostname='host') _check_ri("file://host/path/sp1", URL, scheme='file', hostname='host', path='/path/sp1') # stock libraries of Python aren't quite ready for ipv6 ipv6address = '2001:db8:85a3::8a2e:370:7334' _check_ri("file://%s/path/sp1" % ipv6address, URL, scheme='file', hostname=ipv6address, path='/path/sp1') for lh in ('localhost', '::1', '', '127.3.4.155'): _check_ri("file://%s/path/sp1" % lh, URL, localpath='/path/sp1', scheme='file', hostname=lh, path='/path/sp1') _check_ri('http://[1fff:0:a88:85a3::ac1f]:8001/index.html', URL, scheme='http', hostname='1fff:0:a88:85a3::ac1f', port=8001, path='/index.html') _check_ri("file:///path/sp1", URL, localpath='/path/sp1', scheme='file', path='/path/sp1') # we don't do any magical comprehension for home paths/drives for windows # of file:// urls, thus leaving /~ and /c: for now: _check_ri("file:///~/path/sp1", URL, localpath='/~/path/sp1', scheme='file', path='/~/path/sp1') _check_ri("file:///%7E/path/sp1", URL, localpath='/~/path/sp1', scheme='file', path='/~/path/sp1', exact_str=False) # not sure but let's check _check_ri("file:///c:/path/sp1", URL, localpath='/c:/path/sp1', scheme='file', path='/c:/path/sp1', exact_str=False) # and now implicit paths or actually they are also "URI references" _check_ri("f", PathRI, localpath='f', path='f') _check_ri("f/s1", PathRI, localpath='f/s1', path='f/s1') _check_ri("/f", PathRI, localpath='/f', path='/f') _check_ri("/f/s1", PathRI, localpath='/f/s1', path='/f/s1') # some github ones, just to make sure _check_ri("git://host/user/proj", URL, scheme="git", hostname="host", path="/user/proj") _check_ri("git@host:user/proj", SSHRI, hostname="host", path="user/proj", username='******') _check_ri('weired:/', SSHRI, hostname='weired', path='/') # since schema is not allowing some symbols so we need to add additional check _check_ri('weired_url:/', SSHRI, hostname='weired_url', path='/') _check_ri('example.com:/', SSHRI, hostname='example.com', path='/') _check_ri('example.com:path/sp1', SSHRI, hostname='example.com', path='path/sp1') _check_ri('example.com/path/sp1\:fname', PathRI, localpath='example.com/path/sp1\:fname', path='example.com/path/sp1\:fname') # ssh is as stupid as us, so we will stay "Consistently" dumb """ $> ssh example.com/path/sp1:fname ssh: Could not resolve hostname example.com/path/sp1:fname: Name or service not known """ _check_ri('example.com/path/sp1:fname', SSHRI, hostname='example.com/path/sp1', path='fname') # SSHRIs have .port, but it is empty eq_(SSHRI(hostname='example.com').port, '') # check that we are getting a warning logged when url can't be reconstructed # precisely # actually failed to come up with one -- becomes late here #_check_ri("http://host///..//p", scheme='http', path='/..//p') # actually this one is good enough to trigger a warning and I still don't know # what it should exactly be!? with swallow_logs(new_level=logging.DEBUG) as cml: weired_str = 'weired://' weired_url = RI(weired_str) repr(weired_url) cml.assert_logged( 'Parsed version of SSHRI .weired:/. ' 'differs from original .weired://.' ) # but we store original str eq_(str(weired_url), weired_str) neq_(weired_url.as_str(), weired_str) raise SkipTest("TODO: file://::1/some does complain about parsed version dropping ::1")
def test_aggregation(path): with chpwd(path): assert_raises(InsufficientArgumentsError, aggregate_metadata, None) # a hierarchy of three (super/sub)datasets, each with some native metadata ds = Dataset(opj(path, 'origin')).create(force=True) # before anything aggregated we would get nothing and only a log warning with swallow_logs(new_level=logging.WARNING) as cml: assert_equal(list(query_aggregated_metadata('all', ds, [])), []) assert_re_in('.*Found no aggregated metadata.*update', cml.out) ds.config.add('datalad.metadata.nativetype', 'frictionless_datapackage', where='dataset') subds = ds.create('sub', force=True) subds.config.add('datalad.metadata.nativetype', 'frictionless_datapackage', where='dataset') subsubds = subds.create('subsub', force=True) subsubds.config.add('datalad.metadata.nativetype', 'frictionless_datapackage', where='dataset') ds.save(recursive=True) assert_repo_status(ds.path) # aggregate metadata from all subdatasets into any superdataset, including # intermediate ones res = ds.aggregate_metadata(recursive=True, update_mode='all') # we get success report for both subdatasets and the superdataset, # and they get saved assert_result_count(res, 3, status='ok', action='aggregate_metadata') assert_in_results(res, action='save', status="ok") # nice and tidy assert_repo_status(ds.path) # quick test of aggregate report aggs = ds.metadata(get_aggregates=True) # one for each dataset assert_result_count(aggs, 3) # mother also report layout version assert_result_count(aggs, 1, path=ds.path, layout_version=1) # store clean direct result origres = ds.metadata(recursive=True) # basic sanity check assert_result_count(origres, 6) assert_result_count(origres, 3, type='dataset') assert_result_count(origres, 3, type='file') # Now that we have annex.key # three different IDs assert_equal( 3, len(set([s['dsid'] for s in origres if s['type'] == 'dataset']))) # and we know about all three datasets for name in ('MOTHER_äöü東', 'child_äöü東', 'grandchild_äöü東'): assert_true( sum([s['metadata']['frictionless_datapackage']['name'] \ == ensure_unicode(name) for s in origres if s['type'] == 'dataset'])) # now clone the beast to simulate a new user installing an empty dataset clone = install(opj(path, 'clone'), source=ds.path, result_xfm='datasets', return_type='item-or-list') # ID mechanism works assert_equal(ds.id, clone.id) # get fresh metadata cloneres = clone.metadata() # basic sanity check assert_result_count(cloneres, 2) assert_result_count(cloneres, 1, type='dataset') assert_result_count(cloneres, 1, type='file') # now loop over the previous results from the direct metadata query of # origin and make sure we get the extact same stuff from the clone _compare_metadata_helper(origres, clone) # now obtain a subdataset in the clone, should make no difference assert_status('ok', clone.install('sub', result_xfm=None, return_type='list')) _compare_metadata_helper(origres, clone) # test search in search tests, not all over the place ## query smoke test assert_result_count(clone.search('mother', mode='egrep'), 1) assert_result_count(clone.search('(?i)MoTHER', mode='egrep'), 1) child_res = clone.search('child', mode='egrep') assert_result_count(child_res, 2) for r in child_res: if r['type'] == 'dataset': assert_in(r['query_matched']['frictionless_datapackage.name'], r['metadata']['frictionless_datapackage']['name'])
def test_publish_recursive(origin, src_path, dst_path, sub1_pub, sub2_pub): # prepare src source = install(src_path, source=origin, recursive=True)[0] # create plain git at target: target = GitRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) # subdatasets have no remote yet, so recursive publishing should fail: with assert_raises(ValueError) as cm: publish(dataset=source, to="target", recursive=True) assert_in("No sibling 'target' found.", str(cm.exception)) # now, set up targets for the submodules: sub1_target = GitRepo(sub1_pub, create=True) sub1_target.checkout("TMP", ["-b"]) sub2_target = AnnexRepo(sub2_pub, create=True) sub2_target.checkout("TMP", ["-b"]) sub1 = GitRepo(opj(src_path, 'subm 1'), create=False) sub2 = GitRepo(opj(src_path, 'subm 2'), create=False) sub1.add_remote("target", sub1_pub) sub2.add_remote("target", sub2_pub) # publish recursively with swallow_logs(new_level=logging.DEBUG) as cml: res = publish(dataset=source, to="target", recursive=True) assert_not_in('forced update', cml.out, "we probably haven't merged git-annex before pushing") # testing result list # (Note: Dataset lacks __eq__ for now. Should this be based on path only?) assert_is_instance(res, tuple) assert_is_instance(res[0], list) assert_is_instance(res[1], list) eq_(res[1], []) # nothing failed/was skipped for item in res[0]: assert_is_instance(item, Dataset) eq_({res[0][0].path, res[0][1].path, res[0][2].path}, {src_path, sub1.path, sub2.path}) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) eq_(list(target.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) eq_(list(sub1_target.get_branch_commits("master")), list(sub1.get_branch_commits("master"))) eq_(list(sub1_target.get_branch_commits("git-annex")), list(sub1.get_branch_commits("git-annex"))) eq_(list(sub2_target.get_branch_commits("master")), list(sub2.get_branch_commits("master"))) eq_(list(sub2_target.get_branch_commits("git-annex")), list(sub2.get_branch_commits("git-annex"))) # test for publishing with --since. By default since no changes, only current pushed res_ = publish(dataset=source, recursive=True) # only current one would get pushed eq_(set(r.path for r in res_[0]), {src_path}) # all get pushed res_ = publish(dataset=source, recursive=True, since='HEAD^') eq_(set(r.path for r in res_[0]), {src_path, sub1.path, sub2.path}) # Let's now update one subm with open(opj(sub2.path, "file.txt"), 'w') as f: f.write('') sub2.add('file.txt') sub2.commit("") # TODO: Doesn't work: https://github.com/datalad/datalad/issues/636 #source.save("changed sub2", auto_add_changes=True) source.repo.commit("", options=['-a']) res_ = publish(dataset=source, recursive=True) # only updated ones were published eq_(set(r.path for r in res_[0]), {src_path, sub2.path})
def test_globbedpaths(path): dotdir = op.curdir + op.sep for patterns, expected in [ (["1.txt", "2.dat"], {"1.txt", "2.dat"}), ([dotdir + "1.txt", "2.dat"], {dotdir + "1.txt", "2.dat"}), (["*.txt", "*.dat"], {"1.txt", "2.dat", u"bβ.dat", "3.txt"}), ([dotdir + "*.txt", "*.dat"], {dotdir + "1.txt", "2.dat", u"bβ.dat", dotdir + "3.txt"}), (["subdir/*.txt"], {"subdir/1.txt", "subdir/2.txt"}), ([dotdir + "subdir/*.txt"], {dotdir + p for p in ["subdir/1.txt", "subdir/2.txt"]}), (["*.txt"], {"1.txt", "3.txt"}) ]: gp = GlobbedPaths(patterns, pwd=path) eq_(set(gp.expand()), expected) eq_(set(gp.expand(full=True)), {op.join(path, p) for p in expected}) pardir = op.pardir + op.sep subdir_path = op.join(path, "subdir") for patterns, expected in [ (["*.txt"], {"1.txt", "2.txt"}), ([dotdir + "*.txt"], {dotdir + p for p in ["1.txt", "2.txt"]}), ([pardir + "*.txt"], {pardir + p for p in ["1.txt", "3.txt"]}), ([dotdir + pardir + "*.txt"], {dotdir + pardir + p for p in ["1.txt", "3.txt"]}), (["subdir/"], {"subdir/"}) ]: gp = GlobbedPaths(patterns, pwd=subdir_path) eq_(set(gp.expand()), expected) eq_(set(gp.expand(full=True)), {op.join(subdir_path, p) for p in expected}) # Full patterns still get returned as relative to pwd. gp = GlobbedPaths([op.join(path, "*.dat")], pwd=path) eq_(gp.expand(), ["2.dat", u"bβ.dat"]) # "." gets special treatment. gp = GlobbedPaths([".", "*.dat"], pwd=path) eq_(set(gp.expand()), {"2.dat", u"bβ.dat", "."}) eq_(gp.expand(dot=False), ["2.dat", u"bβ.dat"]) gp = GlobbedPaths(["."], pwd=path, expand=False) eq_(gp.expand(), ["."]) eq_(gp.paths, ["."]) # We can the glob outputs. glob_results = {"z": "z", "a": ["x", "d", "b"]} with patch('glob.glob', glob_results.get): gp = GlobbedPaths(["z", "a"]) eq_(gp.expand(), ["z", "b", "d", "x"]) # glob expansion for paths property is determined by expand argument. for expand, expected in [(True, ["2.dat", u"bβ.dat"]), (False, ["*.dat"])]: gp = GlobbedPaths(["*.dat"], pwd=path, expand=expand) eq_(gp.paths, expected) with swallow_logs(new_level=logging.DEBUG) as cml: GlobbedPaths(["not here"], pwd=path).expand() assert_in("No matching files found for 'not here'", cml.out)