def test_aggregate_with_missing_or_duplicate_id(path): # a hierarchy of three (super/sub)datasets, each with some native metadata ds = Dataset(opj(path, 'origin')).create(force=True) subds = ds.create('sub', force=True) subds.remove(opj('.datalad', 'config'), if_dirty='ignore') assert_false(exists(opj(subds.path, '.datalad', 'config'))) subsubds = subds.create('subsub', force=True) # aggregate from bottom to top, guess native data, no compacting of graph # should yield 6 meta data sets, one implicit, and one native per dataset # and a second native set for the topmost dataset aggregate_metadata(ds, guess_native_type=True, recursive=True) # no only ask the top superdataset, no recursion, just reading from the cache meta = get_metadata( ds, guess_type=False, ignore_subdatasets=False, ignore_cache=False) # and we know nothing subsub for name in ('grandchild_äöü東',): assert_true(sum([s.get('name', '') == assure_unicode(name) for s in meta])) # but search should not fail with swallow_outputs(): res1 = list(search_('.', regex=True, dataset=ds)) assert res1 # and let's see now if we wouldn't fail if dataset is duplicate if we # install the same dataset twice subds_clone = ds.install(source=subds.path, path="subds2") with swallow_outputs(): res2 = list(search_('.', regex=True, dataset=ds))
def test_basics(path, nodspath): ds = Dataset(path).create() direct_mode = ds.repo.is_direct_mode() last_state = ds.repo.get_hexsha() # run inside the dataset with chpwd(path), \ swallow_outputs(): # provoke command failure with assert_raises(CommandError) as cme: ds.run('7i3amhmuch9invalid') # let's not speculate that the exit code is always 127 ok_(cme.code > 0) eq_(last_state, ds.repo.get_hexsha()) # now one that must work res = ds.run('touch empty', message='TEST') ok_clean_git(ds.path) assert_result_count(res, 2) # TODO 'state' is still untracked!!! assert_result_count(res, 1, action='add', path=opj(ds.path, 'empty'), type='file') assert_result_count(res, 1, action='save', path=ds.path) commit_msg = ds.repo.format_commit("%B") ok_(commit_msg.startswith('[DATALAD RUNCMD] TEST')) # crude test that we have a record for the PWD assert_in('"pwd": "."', commit_msg) last_state = ds.repo.get_hexsha() # now run a command that will not alter the dataset res = ds.run('touch empty', message='NOOP_TEST') # When in direct mode, check at the level of save rather than add # because the annex files show up as typechanges and adding them won't # necessarily have a "notneeded" status. assert_result_count(res, 1, action='save' if direct_mode else 'add', status='notneeded') eq_(last_state, ds.repo.get_hexsha()) # We can also run the command via a single-item list because this is # what the CLI interface passes in for quoted commands. res = ds.run(['touch empty'], message='NOOP_TEST') assert_result_count(res, 1, action='save' if direct_mode else 'add', status='notneeded') # run outside the dataset, should still work but with limitations with chpwd(nodspath), \ swallow_outputs(): res = ds.run(['touch', 'empty2'], message='TEST') assert_result_count(res, 1, action='add', path=opj(ds.path, 'empty2'), type='file', status='ok') assert_result_count(res, 1, action='save', status='ok') # running without a command is a noop with chpwd(path): with swallow_logs(new_level=logging.WARN) as cml: ds.run() assert_in("No command given", cml.out)
def test_rerun_just_one_commit(path): ds = Dataset(path).create() # Check out an orphan branch so that we can test the "one commit # in a repo" case. ds.repo.checkout("orph", options=["--orphan"]) ds.repo.repo.git.reset("--hard") ds.repo.config.reload() ds.run('echo static-content > static') assert_result_count(ds.repo.repo.git.rev_list("HEAD").split(), 1) # Rerunning with just one commit doesn't raise an error ... ds.rerun() # ... but we're still at one commit because the content didn't # change. assert_result_count(ds.repo.repo.git.rev_list("HEAD").split(), 1) # We abort rather than trying to do anything when --onto='' and # --since='' are given together and the first commit contains a # run command. ds.repo.commit(msg="empty", options=["--allow-empty"]) assert_raises(IncompleteResultsError, ds.rerun, since="", onto="") # --script propagates the error. with swallow_outputs(): assert_raises(IncompleteResultsError, ds.rerun, since="", onto="", script="-") # --dry-run propagates the error. assert_raises(IncompleteResultsError, ds.rerun, since="", onto="", report=True, return_type="list")
def test_runner_log_stdout(): # TODO: no idea of how to check correct logging via any kind of assertion yet. runner = Runner(dry=False) cmd_ = ['echo', 'stdout-Message should be logged'] for cmd in [cmd_, ' '.join(cmd_)]: # should be identical runs, either as a string or as a list kw = {} # on Windows it can't find echo if ran outside the shell if on_windows and isinstance(cmd, list): kw['shell'] = True with swallow_logs(logging.DEBUG) as cm: ret = runner.run(cmd, log_stdout=True, **kw) eq_(cm.lines[0], "Running: %s" % cmd) if not on_windows: # we can just count on sanity eq_(cm.lines[1], "stdout| stdout-Message should be logged") else: # echo outputs quoted lines for some reason, so relax check ok_("stdout-Message should be logged" in cm.lines[1]) assert_equal(runner.commands, [], "Run of: %s resulted in non-empty buffer: %s" % (cmd, runner.commands.__str__())) cmd = 'echo stdout-Message should not be logged' with swallow_outputs() as cmo: with swallow_logs(new_level=logging.INFO) as cml: ret = runner.run(cmd, log_stdout=False) eq_(cmo.out, "stdout-Message should not be logged\n") eq_(cml.out, "") assert_equal(runner.commands, [], "Run of: %s resulted in non-empty buffer: %s" % (cmd, runner.commands.__str__()))
def test_install_subdataset(src, path): # get the superdataset: ds = install(path=path, source=src) # subdataset not installed: subds = Dataset(opj(path, 'sub1')) assert_false(subds.is_installed()) # install it: ds.install('sub1') ok_(subds.is_installed()) # Verify that it is the correct submodule installed and not # new repository initiated assert_equal(set(subds.repo.get_indexed_files()), {'test.dat', 'INFO.txt', 'test-annex.dat'}) # Now the obnoxious install an annex file within not yet # initialized repository! with swallow_outputs(): # progress bar ds.install(opj('sub2', 'test-annex.dat')) subds2 = Dataset(opj(path, 'sub2')) assert(subds2.is_installed()) assert(subds2.repo.file_has_content('test-annex.dat')) # we shouldn't be able silently ignore attempt to provide source while # "installing" file under git assert_raises(FileInGitError, ds.install, opj('sub2', 'INFO.txt'), source="http://bogusbogus")
def test_py2_unicode_command(path): # Avoid OBSCURE_FILENAME to avoid windows-breakage (gh-2929). ds = Dataset(path).create() touch_cmd = "import sys; open(sys.argv[1], 'w').write('')" cmd_str = u"{} -c \"{}\" {}".format(sys.executable, touch_cmd, u"bβ0.dat") ds.run(cmd_str) assert_repo_status(ds.path) ok_exists(op.join(path, u"bβ0.dat")) if not on_windows: # FIXME ds.run([sys.executable, "-c", touch_cmd, u"bβ1.dat"]) assert_repo_status(ds.path) ok_exists(op.join(path, u"bβ1.dat")) # Send in a list of byte-strings to mimic a py2 command-line # invocation. ds.run([s.encode("utf-8") for s in [sys.executable, "-c", touch_cmd, u" β1 "]]) assert_repo_status(ds.path) ok_exists(op.join(path, u" β1 ")) with assert_raises(CommandError), swallow_outputs(): ds.run(u"bβ2.dat")
def test_run_cmdline_disambiguation(path): Dataset(path).create() with chpwd(path): # Without a positional argument starting a command, any option is # treated as an option to 'datalad run'. with swallow_outputs() as cmo: with patch("datalad.core.local.run._execute_command") as exec_cmd: with assert_raises(SystemExit): main(["datalad", "run", "--message"]) exec_cmd.assert_not_called() assert_in("message: expected one", cmo.err) # If we want to pass an option as the first value of a command (e.g., # because we are using a runscript with containers-run), we can do this # with "--". with patch("datalad.core.local.run._execute_command") as exec_cmd: with assert_raises(SystemExit): main(["datalad", "run", "--", "--message"]) exec_cmd.assert_called_once_with( "--message", path, expected_exit=None) # And a twist on above: Our parser mishandles --version (gh-3067), # treating 'datalad run CMD --version' as 'datalad --version'. version_stream = "err" if PY2 else "out" with swallow_outputs() as cmo: with assert_raises(SystemExit) as cm: main(["datalad", "run", "echo", "--version"]) eq_(cm.exception.code, 0) out = getattr(cmo, version_stream) with swallow_outputs() as cmo: with assert_raises(SystemExit): main(["datalad", "--version"]) version_out = getattr(cmo, version_stream) ok_(version_out) eq_(version_out, out) # We can work around that (i.e., make "--version" get passed as # command) with "--". with patch("datalad.core.local.run._execute_command") as exec_cmd: with assert_raises(SystemExit): main(["datalad", "run", "--", "echo", "--version"]) exec_cmd.assert_called_once_with( "echo --version", path, expected_exit=None)
def test_exit_code(): # will relay actual exit code on CommandError cmd = ['datalad', 'sshrun', 'localhost', 'exit 42'] with assert_raises(SystemExit) as cme: # running nosetests without -s if isinstance(sys.stdout, StringIO): # pragma: no cover with swallow_outputs(): # need to give smth with .fileno ;) main(cmd) else: # to test both scenarios main(cmd) assert_equal(cme.exception.code, 42)
def test_run_explicit(path): ds = Dataset(path) assert_false(ds.repo.file_has_content("test-annex.dat")) create_tree(ds.path, {"dirt_untracked": "untracked", "dirt_modified": "modified"}) ds.save("dirt_modified", to_git=True) with open(op.join(path, "dirt_modified"), "a") as ofh: ofh.write(", more") # We need explicit=True to run with dirty repo. assert_status("impossible", ds.run("cat test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], on_failure="ignore")) hexsha_initial = ds.repo.get_hexsha() # If we specify test-annex.dat as an input, it will be retrieved before the # run. ds.run("cat test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], explicit=True) ok_(ds.repo.file_has_content("test-annex.dat")) # We didn't commit anything because outputs weren't specified. assert_false(ds.repo.file_has_content("doubled.dat")) eq_(hexsha_initial, ds.repo.get_hexsha()) # If an input doesn't exist, we just show the standard warning. with swallow_logs(new_level=logging.WARN) as cml: with swallow_outputs(): ds.run("ls", inputs=["not-there"], explicit=True) assert_in("Input does not exist: ", cml.out) remove(op.join(path, "doubled.dat")) hexsha_initial = ds.repo.get_hexsha() ds.run("cat test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], outputs=["doubled.dat"], explicit=True) ok_(ds.repo.file_has_content("doubled.dat")) assert_repo_status(ds.path, modified=["dirt_modified"], untracked=['dirt_untracked']) neq_(hexsha_initial, ds.repo.get_hexsha()) # Saving explicit outputs works from subdirectories. subdir = op.join(path, "subdir") mkdir(subdir) with chpwd(subdir): run("echo insubdir >foo", explicit=True, outputs=["foo"]) ok_(ds.repo.file_has_content(op.join("subdir", "foo")))
def test_runner_log_stderr(): # TODO: no idea of how to check correct logging via any kind of assertion yet. runner = Runner(dry=False) cmd = 'echo stderr-Message should be logged >&2' ret = runner.run(cmd, log_stderr=True, expect_stderr=True) assert_equal(runner.commands, [], "Run of: %s resulted in non-empty buffer: %s" % (cmd, runner.commands.__str__())) cmd = 'echo stderr-Message should not be logged >&2' with swallow_outputs() as cmo: with swallow_logs(new_level=logging.INFO) as cml: ret = runner.run(cmd, log_stderr=False) eq_(cmo.err.rstrip(), "stderr-Message should not be logged") eq_(cml.out, "") assert_equal(runner.commands, [], "Run of: %s resulted in non-empty buffer: %s" % (cmd, runner.commands.__str__()))
def test_wtf(path): # smoke test for now with swallow_outputs() as cmo: wtf(dataset=path) assert_not_in('## dataset', cmo.out) assert_in('## configuration', cmo.out) # Those sections get sensored out by default now assert_not_in('user.name: ', cmo.out) with chpwd(path): with swallow_outputs() as cmo: wtf() assert_not_in('## dataset', cmo.out) assert_in('## configuration', cmo.out) # now with a dataset ds = create(path) with swallow_outputs() as cmo: wtf(dataset=ds.path) assert_in('## configuration', cmo.out) assert_in('## dataset', cmo.out) assert_in('path: {}'.format(ds.path), cmo.out) # and if we run with all sensitive for sensitive in ('some', True): with swallow_outputs() as cmo: wtf(dataset=ds.path, sensitive=sensitive) # we fake those for tests anyways, but we do show cfg in this mode # and explicitly not showing them assert_in('user.name: %s' % _HIDDEN, cmo.out) with swallow_outputs() as cmo: wtf(dataset=ds.path, sensitive='all') assert_not_in(_HIDDEN, cmo.out) # all is shown assert_in('user.name: ', cmo.out) skip_if_no_module('pyperclip') # verify that it works correctly in the env/platform import pyperclip with swallow_outputs() as cmo: try: pyperclip.copy("xxx") pyperclip_works = pyperclip.paste().strip() == "xxx" wtf(dataset=ds.path, clipboard=True) except (AttributeError, pyperclip.PyperclipException) as exc: # AttributeError could come from pyperclip if no DISPLAY raise SkipTest(exc_str(exc)) assert_in("WTF information of length", cmo.out) assert_not_in('user.name', cmo.out) if not pyperclip_works: # Some times does not throw but just fails to work raise SkipTest( "Pyperclip seems to be not functioning here correctly") assert_not_in('user.name', pyperclip.paste()) assert_in(_HIDDEN, pyperclip.paste()) # by default no sensitive info assert_in("cmd:annex:", pyperclip.paste()) # but the content is there
def test_add_archive_dirs(path_orig, url, repo_path): # change to repo_path with chpwd(repo_path): # create annex repo repo = AnnexRepo(repo_path, create=True) # add archive to the repo so we could test with swallow_outputs(): repo.add_urls([opj(url, '1.tar.gz')], options=["--pathdepth", "-1"]) repo.commit("added 1.tar.gz") # test with excludes and annex options add_archive_content( '1.tar.gz', existing='archive-suffix', # Since inconsistent and seems in many cases no leading dirs to strip, keep them as provided strip_leading_dirs=True, delete=True, leading_dirs_consider=['crcns.*', '1'], leading_dirs_depth=2, use_current_dir=False, exclude='.*__MACOSX.*') # some junk penetrates eq_( repo.get_description( uuid=DATALAD_SPECIAL_REMOTES_UUIDS[ARCHIVES_SPECIAL_REMOTE]), '[%s]' % ARCHIVES_SPECIAL_REMOTE) all_files = sorted(find_files('.')) target_files = { './CR24A/behaving1/1 f.txt', './CR24C/behaving3/3 f.txt', './CR24D/behaving2/2 f.txt', } eq_(set(all_files), target_files) # regression test: the subdir in MACOSX wasn't excluded and its name was getting stripped by leading_dir_len assert_false(exists( '__MACOSX')) # if stripping and exclude didn't work this fails assert_false( exists('c-1_data') ) # if exclude doesn't work then name of subdir gets stripped by leading_dir_len assert_false( exists('CR24B') ) # if exclude doesn't work but everything else works this fails
def test_rerun_chain(path): ds = Dataset(path).create() commits = [] with swallow_outputs(): ds.run('echo x$(cat grows) > grows') ds.repo.tag("first-run") for _ in range(3): commits.append(ds.repo.get_hexsha()) ds.rerun() _, info = get_run_info(ds, ds.repo.format_commit("%B")) assert info["chain"] == commits ds.rerun(revision="first-run") _, info = get_run_info(ds, ds.repo.format_commit("%B")) assert info["chain"] == commits[:1]
def test_rerun_chain(path): ds = Dataset(path).create() commits = [] with swallow_outputs(): ds.run('echo x$(cat grows) > grows') ds.repo.tag("first-run", commit=DEFAULT_BRANCH) for _ in range(3): commits.append(ds.repo.get_hexsha(DEFAULT_BRANCH)) ds.rerun() _, info = get_run_info(ds, last_commit_msg(ds.repo)) eq_(info["chain"], commits) ds.rerun(revision="first-run") _, info = get_run_info(ds, last_commit_msg(ds.repo)) eq_(info["chain"], commits[:1])
def check_runner_heavy_output(log_online): # TODO: again, no automatic detection of this resulting in being # stucked yet. runner = Runner() cmd = '%s %s' % (sys.executable, op.join(op.dirname(__file__), "heavyoutput.py")) with swallow_outputs() as cm, swallow_logs(): ret = runner.run(cmd, log_online=log_online, log_stderr=False, log_stdout=False, expect_stderr=True) eq_(cm.err, cm.out) # they are identical in that script eq_(cm.out[:10], "0 [0, 1, 2") eq_(cm.out[-15:], "997, 998, 999]\n") # for some reason swallow_logs is not effective, so we just skip altogether # if too heavy debug output if lgr.getEffectiveLevel() <= logging.DEBUG: raise SkipTest("Skipping due to too heavy impact on logs complicating debugging") #do it again with capturing: with swallow_logs(): ret = runner.run(cmd, log_online=log_online, log_stderr=True, log_stdout=True, expect_stderr=True) if log_online: # halting case of datalad add and other batch commands #2116 logged = [] with swallow_logs(): def process_stdout(l): assert l logged.append(l) ret = runner.run( cmd, log_online=log_online, log_stdout=process_stdout, log_stderr='offline', expect_stderr=True ) assert_equal(len(logged), 100) assert_greater(len(ret[1]), 1000) # stderr all here assert not ret[0], "all messages went into `logged`"
def test_runner_log_stderr(): # TODO: no idea of how to check correct logging via any kind of assertion yet. runner = Runner(dry=False) cmd = 'echo stderr-Message should be logged >&2' ret = runner.run(cmd, log_stderr=True, expect_stderr=True) assert_equal( runner.commands, [], "Run of: %s resulted in non-empty buffer: %s" % (cmd, runner.commands.__str__())) cmd = 'echo stderr-Message should not be logged >&2' with swallow_outputs() as cmo: with swallow_logs(new_level=logging.INFO) as cml: ret = runner.run(cmd, log_stderr=False) eq_(cmo.err.rstrip(), "stderr-Message should not be logged") eq_(cml.out, "") assert_equal( runner.commands, [], "Run of: %s resulted in non-empty buffer: %s" % (cmd, runner.commands.__str__()))
def check_runner_heavy_output(log_online): # TODO: again, no automatic detection of this resulting in being stucked yet. runner = Runner() cmd = '%s -c "import datalad.tests.heavyoutput;"' % sys.executable with swallow_outputs() as cm: ret = runner.run(cmd, log_stderr=False, log_stdout=False, expect_stderr=True) eq_(cm.err, cm.out) # they are identical in that script eq_(cm.out[:10], "[0, 1, 2, ") eq_(cm.out[-15:], "997, 998, 999]\n") #do it again with capturing: ret = runner.run(cmd, log_stderr=True, log_stdout=True, expect_stderr=True) # and now original problematic command with a massive single line if not log_online: # We know it would get stuck in online mode cmd = '%s -c "import sys; x=str(list(range(1000))); [(sys.stdout.write(x), sys.stderr.write(x)) for i in xrange(100)];"' % sys.executable ret = runner.run(cmd, log_stderr=True, log_stdout=True, expect_stderr=True)
def test_status_custom_summary_no_repeats(path): from datalad.api import Dataset from datalad.core.local.status import Status # This regression test depends on the command having a custom summary # renderer *and* the particular call producing summary output. status() # having this method doesn't guarantee that it is still an appropriate # command for this test, but it's at least a necessary condition. ok_(hasattr(Status, "custom_result_summary_renderer")) ds = Dataset(path).create() out = WitlessRunner(cwd=path).run( ["datalad", "--output-format=tailored", "status"], protocol=StdOutCapture) out_lines = out['stdout'].splitlines() ok_(out_lines) eq_(len(out_lines), len(set(out_lines))) with swallow_outputs() as cmo: ds.status(return_type="list", result_renderer="tailored") eq_(out_lines, cmo.out.splitlines())
def test_py2_unicode_command(path): # Avoid OBSCURE_FILENAME to avoid windows-breakage (gh-2929). ds = Dataset(path).create() touch_cmd = "import sys; open(sys.argv[1], 'w').write('')" cmd_str = u"{} -c \"{}\" {}".format(sys.executable, touch_cmd, u"bβ0.dat") ds.run(cmd_str) ok_clean_git(ds.path) ok_exists(op.join(path, u"bβ0.dat")) ds.run([sys.executable, "-c", touch_cmd, u"bβ1.dat"]) ok_clean_git(ds.path) ok_exists(op.join(path, u"bβ1.dat")) # Send in a list of byte-strings to mimic a py2 command-line invocation. ds.run([ s.encode("utf-8") for s in [sys.executable, "-c", touch_cmd, u" β1 "] ]) ok_clean_git(ds.path) ok_exists(op.join(path, u" β1 ")) with assert_raises(CommandError), swallow_outputs(): ds.run(u"bβ2.dat")
def test_run_failure(path): ds = Dataset(path).create() subds = ds.create("sub") hexsha_initial = ds.repo.get_hexsha() with swallow_outputs(): with assert_raises(CommandError): ds.run("echo x$(cat sub/grows) > sub/grows && false") eq_(hexsha_initial, ds.repo.get_hexsha()) ok_(ds.repo.dirty) msgfile = opj(path, ds.repo.get_git_dir(ds.repo), "COMMIT_EDITMSG") ok_exists(msgfile) ds.add(".", recursive=True, message_file=msgfile) ok_clean_git(ds.path) neq_(hexsha_initial, ds.repo.get_hexsha()) outfile = opj(subds.path, "grows") eq_('x\n', open(outfile).read()) # There is no CommandError on rerun if the non-zero error matches the # original code. ds.rerun() eq_('xx\n', open(outfile).read()) # On the other hand, we fail if we rerun a command and there is a non-zero # error that doesn't match. ds.run("[ ! -e bar ] && echo c >bar") ok_clean_git(ds.path) with assert_raises(CommandError): ds.rerun() # We don't show instructions if the caller specified us not to save. remove(msgfile) with assert_raises(CommandError): ds.run("false", explicit=True, outputs=None) assert_false(op.exists(msgfile))
def check_decompress_file(leading_directories, path): outdir = op.join(path, 'simple-extracted') with swallow_outputs() as cmo: decompress_file(op.join(path, fn_archive_obscure_ext), outdir, leading_directories=leading_directories) eq_(cmo.out, "") eq_(cmo.err, "") path_archive_obscure = op.join(outdir, fn_archive_obscure) if leading_directories == 'strip': assert_false(op.exists(path_archive_obscure)) testpath = outdir elif leading_directories is None: assert_true(op.exists(path_archive_obscure)) testpath = path_archive_obscure else: raise NotImplementedError("Dunno about this strategy: %s" % leading_directories) assert_true(op.exists(op.join(testpath, '3.txt'))) assert_true(op.exists(op.join(testpath, fn_in_archive_obscure))) with open(op.join(testpath, '3.txt')) as f: eq_(f.read(), '3 load')
def test_rerun_just_one_commit(path): ds = Dataset(path).create() if ds.repo.is_managed_branch(): assert_status('impossible', ds.rerun(branch="triggers-abort", on_failure="ignore")) raise SkipTest("Test incompatible with adjusted branch") ds.repo.checkout("orph", options=["--orphan"]) ds.repo.call_git(["reset", "--hard"]) ds.repo.config.reload() ds.run('echo static-content > static') eq_(len(ds.repo.get_revisions("HEAD")), 1) assert_raises(IncompleteResultsError, ds.rerun) assert_raises(IncompleteResultsError, ds.rerun, since="", onto="") # --script propagates the error. with swallow_outputs(): assert_raises(IncompleteResultsError, ds.rerun, since="", onto="", script="-") # --dry-run propagates the error. assert_raises(IncompleteResultsError, ds.rerun, since="", onto="", report=True, return_type="list")
def test_wtf(path): # smoke test for now with swallow_outputs() as cmo: wtf(dataset=path) assert_not_in('## dataset', cmo.out) assert_in('## configuration', cmo.out) # Those sections get sensored out by default now assert_not_in('user.name: ', cmo.out) with chpwd(path): with swallow_outputs() as cmo: wtf() assert_not_in('## dataset', cmo.out) assert_in('## configuration', cmo.out) # now with a dataset ds = create(path) with swallow_outputs() as cmo: wtf(dataset=ds.path) assert_in('## configuration', cmo.out) assert_in('## dataset', cmo.out) assert_in('path: {}'.format(ds.path), cmo.out) # and if we run with all sensitive for sensitive in ('some', True): with swallow_outputs() as cmo: wtf(dataset=ds.path, sensitive=sensitive) # we fake those for tests anyways, but we do show cfg in this mode # and explicitly not showing them assert_in('user.name: %s' % _HIDDEN, cmo.out) with swallow_outputs() as cmo: wtf(dataset=ds.path, sensitive='all') assert_not_in(_HIDDEN, cmo.out) # all is shown assert_in('user.name: ', cmo.out) # Sections selection # # If we ask for no sections and there is no dataset with chpwd(path): with swallow_outputs() as cmo: wtf(sections=[]) assert_not_in('## dataset', cmo.out) for s in SECTION_CALLABLES: assert_not_in('## %s' % s.lower(), cmo.out.lower()) # ask for a selected set secs = ['git-annex', 'configuration'] with chpwd(path): with swallow_outputs() as cmo: wtf(sections=secs) for s in SECTION_CALLABLES: (assert_in if s in secs else assert_not_in)( '## %s' % s.lower(), cmo.out.lower() ) # order should match our desired one, not alphabetical assert cmo.out.index('## git-annex') < cmo.out.index('## configuration') # not achievable from cmdline is to pass an empty list of sections. with chpwd(path): with swallow_outputs() as cmo: wtf(sections=[]) eq_(cmo.out.rstrip(), '# WTF') # and we could decorate it nicely for embedding e.g. into github issues with swallow_outputs() as cmo: wtf(sections=['dependencies'], decor='html_details') ok_startswith(cmo.out, '<details><summary>DataLad %s WTF' % __version__) assert_in('## dependencies', cmo.out) # should result only in '# WTF' skip_if_no_module('pyperclip') # verify that it works correctly in the env/platform import pyperclip with swallow_outputs() as cmo: try: pyperclip.copy("xxx") pyperclip_works = pyperclip.paste().strip() == "xxx" wtf(dataset=ds.path, clipboard=True) except (AttributeError, pyperclip.PyperclipException) as exc: # AttributeError could come from pyperclip if no DISPLAY raise SkipTest(exc_str(exc)) assert_in("WTF information of length", cmo.out) assert_not_in('user.name', cmo.out) if not pyperclip_works: # Some times does not throw but just fails to work raise SkipTest( "Pyperclip seems to be not functioning here correctly") assert_not_in('user.name', pyperclip.paste()) assert_in(_HIDDEN, pyperclip.paste()) # by default no sensitive info assert_in("cmd:annex:", pyperclip.paste()) # but the content is there
def test_aggregation(path): with chpwd(path): assert_raises(InsufficientArgumentsError, aggregate_metadata, None) # a hierarchy of three (super/sub)datasets, each with some native metadata ds = Dataset(opj(path, 'origin')).create(force=True) subds = ds.create('sub', force=True, if_dirty='ignore') subsubds = subds.create('subsub', force=True, if_dirty='ignore') # aggregate from bottom to top, guess native data, no compacting of graph # should yield 6 meta data sets, one implicit, and one native per dataset # and a second natiev set for the topmost dataset aggregate_metadata(ds, guess_native_type=True, recursive=True) # no only ask the top superdataset, no recursion, just reading from the cache meta = get_metadata(ds, guess_type=False, ignore_subdatasets=False, ignore_cache=False) assert_equal(len(meta), 10) # same schema assert_equal( 10, sum([ s.get('@context', {'@vocab': None})['@vocab'] == 'http://schema.org/' for s in meta ])) # three different IDs assert_equal(3, len(set([s.get('@id') for s in meta]))) # and we know about all three datasets for name in ('mother_äöü東', 'child_äöü東', 'grandchild_äöü東'): assert_true( sum([s.get('name', None) == assure_unicode(name) for s in meta])) #print(meta) assert_equal( # first implicit, then two natives, then aggregate meta[3]['dcterms:hasPart']['@id'], subds.id) success = False for m in meta: p = m.get('dcterms:hasPart', {}) if p.get('@id', None) == subsubds.id: assert_equal(opj('sub', 'subsub'), p.get('location', None)) success = True assert_true(success) # save the toplevel dataset only (see below) ds.save('with aggregated meta data', auto_add_changes=True) # now clone the beast to simulate a new user installing an empty dataset clone = install(opj(path, 'clone'), source=ds.path) # ID mechanism works assert_equal(ds.id, clone.id) # get fresh meta data, the implicit one for the top-most datasets should # differ, but the rest not clonemeta = get_metadata(clone, guess_type=False, ignore_subdatasets=False, ignore_cache=False) # make sure the implicit md for the topmost come first assert_equal(clonemeta[0]['@id'], clone.id) assert_equal(clonemeta[0]['@id'], ds.id) assert_equal(clone.repo.get_hexsha(), ds.repo.get_hexsha()) assert_equal(clonemeta[0]['version'], ds.repo.get_hexsha()) # all but the implicit is identical assert_equal(clonemeta[1:], meta[1:]) # the implicit md of the clone should list a dataset ID for its subds, # although it has not been obtained! assert_equal(clonemeta[3]['dcterms:hasPart']['@id'], subds.id) # now obtain a subdataset in the clone and the IDs should be updated clone.install('sub') partial = get_metadata(clone, guess_type=False, ignore_cache=True) # ids don't change assert_equal(partial[0]['@id'], clonemeta[0]['@id']) # datasets are properly connected assert_equal(partial[1]['dcterms:hasPart']['@id'], partial[2]['@id']) # query smoke test if os.environ.get('DATALAD_TESTS_NONETWORK'): raise SkipTest assert_equal(len(list(clone.search('mother'))), 1) assert_equal(len(list(clone.search('MoTHER'))), 1) # case insensitive child_res = list(clone.search('child')) assert_equal(len(child_res), 2) # little helper to match names def assert_names(res, names, path=clone.path): assert_equal(list(map(itemgetter(0), res)), [opj(path, n) for n in names]) # should yield (location, report) tuples assert_names(child_res, ['sub', 'sub/subsub']) # result should be identical to invoking search from api # and search_ should spit out locations out with swallow_outputs() as cmo: res = list(search_('child', dataset=clone)) assert_equal(res, child_res) assert_in(res[0][0], cmo.out) # and overarching search_ just for smoke testing of processing outputs # and not puking (e.g. under PY3) with swallow_outputs() as cmo: assert list(search_('.', regex=True, dataset=clone)) assert cmo.out # test searching among specified properties only assert_names(clone.search('i', search='name'), ['sub', 'sub/subsub']) assert_names(clone.search('i', search='keywords'), ['.']) # case shouldn't matter assert_names(clone.search('i', search='Keywords'), ['.']) assert_names(clone.search('i', search=['name', 'keywords']), ['.', 'sub', 'sub/subsub']) # without report_matched, we are getting none of the fields assert (all([not x for x in map(itemgetter(1), child_res)])) # but we would get all if asking for '*' assert (all([ len(x) >= 9 for x in map(itemgetter(1), list(clone.search('child', report='*'))) ])) # but we would get only the matching name if we ask for report_matched assert_equal( set( map(lambda x: tuple(x[1].keys()), clone.search('child', report_matched=True))), set([('name', )])) # and the additional field we might have asked with report assert_equal( set( map( lambda x: tuple(sorted(x[1].keys())), clone.search('child', report_matched=True, report=['schema:type']))), set([('name', 'schema:type')])) # and if we ask report to be 'empty', we should get no fields child_res_empty = list(clone.search('child', report='')) assert_equal(len(child_res_empty), 2) assert_equal(set(map(lambda x: tuple(x[1].keys()), child_res_empty)), set([tuple()])) # more tests on returned paths: assert_names(clone.search('datalad'), ['.', 'sub', 'sub/subsub']) # if we clone subdataset and query for value present in it and its kid clone_sub = clone.install('sub') assert_names(clone_sub.search('datalad'), ['.', 'subsub'], clone_sub.path) # Test 'and' for multiple search entries assert_equal(len(list(clone.search(['child', 'bids']))), 2) assert_equal(len(list(clone.search(['child', 'subsub']))), 1) assert_equal(len(list(clone.search(['bids', 'sub']))), 2) res = list(clone.search('.*', regex=True)) # with regex assert_equal(len(res), 3) # one per dataset # we do search, not match assert_equal(len(list(clone.search('randchild', regex=True))), 1) assert_equal(len(list(clone.search(['gr.nd', 'ch.ld'], regex=True))), 1) assert_equal(len(list(clone.search('randchil.', regex=True))), 1) assert_equal(len(list(clone.search('^randchild.*', regex=True))), 0) assert_equal(len(list(clone.search('^grandchild.*', regex=True))), 1) assert_equal(len(list(clone.search('grandchild'))), 1)
def test_plugin_call(path, dspath): # make plugins create_tree( path, { 'dlplugin_dummy.py': dummy_plugin, 'dlplugin_nodocs.py': nodocs_plugin, 'dlplugin_broken.py': broken_plugin, }) fake_dummy_spec = { 'dummy': {'file': opj(path, 'dlplugin_dummy.py')}, 'nodocs': {'file': opj(path, 'dlplugin_nodocs.py')}, 'broken': {'file': opj(path, 'dlplugin_broken.py')}, } with patch('datalad.plugin._get_plugins', return_value=fake_dummy_spec): with swallow_outputs() as cmo: plugin(showplugininfo=True) # hyphen spacing depends on the longest plugin name! # sorted # summary list generation doesn't actually load plugins for speed, # hence broken is not known to be broken here eq_(cmo.out, "broken [no synopsis] ({})\ndummy - real dummy ({})\nnodocs [no synopsis] ({})\n".format( fake_dummy_spec['broken']['file'], fake_dummy_spec['dummy']['file'], fake_dummy_spec['nodocs']['file'])) with swallow_outputs() as cmo: plugin(['dummy'], showpluginhelp=True) eq_(cmo.out.rstrip(), "Usage: dummy(dataset, noval, withval='test')\n\nmydocstring") with swallow_outputs() as cmo: plugin(['nodocs'], showpluginhelp=True) eq_(cmo.out.rstrip(), "Usage: nodocs()\n\nThis plugin has no documentation") # loading fails, no docs assert_raises(ValueError, plugin, ['broken'], showpluginhelp=True) # assume this most obscure plugin name is not used assert_raises(ValueError, plugin, '32sdfhvz984--^^') # broken plugin argument, must match Python keyword arg # specs assert_raises(ValueError, plugin, ['dummy', '1245']) def fake_is_installed(*args, **kwargs): return True with patch('datalad.plugin._get_plugins', return_value=fake_dummy_spec), \ patch('datalad.distribution.dataset.Dataset.is_installed', return_value=True): # does not trip over unsupported argument, they get filtered out, because # we carry all kinds of stuff with swallow_logs(new_level=logging.WARNING) as cml: res = list(plugin(['dummy', 'noval=one', 'obscure=some'])) assert_status('ok', res) cml.assert_logged( msg=".*Ignoring plugin argument\\(s\\).*obscure.*, not supported by plugin.*", regex=True, level='WARNING') # fails on missing positional arg assert_raises(TypeError, plugin, ['dummy']) # positional and kwargs actually make it into the plugin res = list(plugin(['dummy', 'noval=one', 'withval=two']))[0] eq_('one', res['args']['noval']) eq_('two', res['args']['withval']) # kwarg defaults are preserved res = list(plugin(['dummy', 'noval=one']))[0] eq_('test', res['args']['withval']) # repeated specification yields list input res = list(plugin(['dummy', 'noval=one', 'noval=two']))[0] eq_(['one', 'two'], res['args']['noval']) # can do the same thing while bypassing argument parsing for calls # from within python, and even preserve native python dtypes res = list(plugin(['dummy', ('noval', 1), ('noval', 'two')]))[0] eq_([1, 'two'], res['args']['noval']) # and we can further simplify in this case by passing lists right # away res = list(plugin(['dummy', ('noval', [1, 'two'])]))[0] eq_([1, 'two'], res['args']['noval']) # dataset arg handling # run plugin that needs a dataset where there is none with patch('datalad.plugin._get_plugins', return_value=fake_dummy_spec): ds = None with chpwd(dspath): assert_raises(ValueError, plugin, ['dummy', 'noval=one']) # create a dataset here, fixes the error ds = create() res = list(plugin(['dummy', 'noval=one']))[0] # gives dataset instance eq_(ds, res['args']['dataset']) # no do again, giving the dataset path # but careful, `dataset` is a proper argument res = list(plugin(['dummy', 'noval=one'], dataset=dspath))[0] eq_(ds, res['args']['dataset']) # however, if passed alongside the plugins args it also works res = list(plugin(['dummy', 'dataset={}'.format(dspath), 'noval=one']))[0] eq_(ds, res['args']['dataset']) # but if both are given, the proper args takes precedence assert_raises(ValueError, plugin, ['dummy', 'dataset={}'.format(dspath), 'noval=one'], dataset='rubbish')
def test_something(path, new_home): ds = Dataset(opj(path, 'ds')).create(force=True) ds.save() # catches unsupported argument combinations assert_raises(ValueError, ds.configuration, 'dump', spec='some') assert_raises(ValueError, ds.configuration, 'dump', scope='dataset') assert_raises(ValueError, ds.configuration, 'set', spec=('onlyname', )) assert_raises(ValueError, ds.configuration, 'set', spec='nosection=value') # we also get that from the internal helper from datalad.local.configuration import configuration as cfghelper assert_in_results( cfghelper('set', 'global', [('nosection', 'value')], {}), status='error', ) assert_raises(ValueError, ds.configuration, 'invalid') res = ds.configuration(result_renderer='disabled') assert_in_results(res, name='something.user.name', value='Jane Doe') # UTF handling assert_in_results(res, name=u'onemore.complicated の beast with.dot.findme', value='5.0') res = ds.configuration( 'set', spec='some.more=test', result_renderer='disabled', ) assert_in_results(res, name='some.more', value='test') # Python tuple specs # swallow outputs to be able to execise the result renderer with swallow_outputs(): res = ds.configuration( 'set', spec=[ ('some.more.still', 'test2'), # value is non-str -- will be converted ('lonely.val', 4) ], ) assert_in_results(res, name='some.more.still', value='test2') assert_in_results(res, name='lonely.val', value='4') assert_in_results( ds.configuration('get', spec='lonely.val'), status='ok', name='lonely.val', value='4', ) # remove something that does not exist in the specified scope assert_in_results(ds.configuration('unset', scope='dataset', spec='lonely.val', result_renderer='disabled', on_failure='ignore'), status='error') # remove something that does not exist in the specified scope assert_in_results(ds.configuration('unset', spec='lonely.val', result_renderer='disabled'), status='ok') assert_not_in('lonely.val', ds.config) # errors if done again assert_in_results(ds.configuration('unset', spec='lonely.val', result_renderer='disabled', on_failure='ignore'), status='error') # add a subdataset to test recursive operation subds = ds.create('subds') with swallow_outputs(): res = ds.configuration('set', spec='rec.test=done', recursive=True) assert_result_count( res, 2, name='rec.test', value='done', ) # exercise the result renderer with swallow_outputs() as cml: ds.configuration(recursive=True) # we get something on the subds with the desired markup assert_in('<ds>/subds:rec.test=done', cml.out)
def test_run_inputs_outputs(src, path): for subds in [("s0", "s1_0", "s2"), ("s0", "s1_1", "s2"), ("s0", "s1_0"), ("s0", "s1_1"), ("s0", "ss"), ("s0", )]: Dataset(op.join(*((src, ) + subds))).create(force=True) src_ds = Dataset(src).create(force=True) src_ds.add(".", recursive=True) ds = install(path, source=src, result_xfm='datasets', return_type='item-or-list') assert_false(ds.repo.file_has_content("input.dat")) assert_false(ds.repo.file_has_content("extra-input.dat")) # The specified inputs and extra inputs will be retrieved before the run. # (Use run_command() to access the extra_inputs argument.) list( run_command("cat {inputs} {inputs} >doubled.dat", dataset=ds, inputs=["input.dat"], extra_inputs=["extra-input.dat"])) ok_clean_git(ds.path) ok_(ds.repo.file_has_content("input.dat")) ok_(ds.repo.file_has_content("extra-input.dat")) ok_(ds.repo.file_has_content("doubled.dat")) with open(opj(path, "doubled.dat")) as fh: content = fh.read() assert_in("input", content) assert_not_in("extra-input", content) # Rerunning the commit will also get the input file. ds.repo.drop(["input.dat", "extra-input.dat"], options=["--force"]) assert_false(ds.repo.file_has_content("input.dat")) assert_false(ds.repo.file_has_content("extra-input.dat")) ds.rerun() ok_(ds.repo.file_has_content("input.dat")) ok_(ds.repo.file_has_content("extra-input.dat")) with swallow_logs(new_level=logging.WARN) as cml: ds.run("touch dummy", inputs=["not-there"]) assert_in("Input does not exist: ", cml.out) # Test different combinations of globs and explicit files. inputs = ["a.dat", "b.dat", "c.txt", "d.txt"] create_tree(ds.path, {i: i for i in inputs}) ds.add(".") ds.repo.copy_to(inputs, remote="origin") ds.repo.drop(inputs, options=["--force"]) test_cases = [(["*.dat"], ["a.dat", "b.dat"]), (["*.dat", "c.txt"], ["a.dat", "b.dat", "c.txt"]), (["*"], inputs)] for idx, (inputs_arg, expected_present) in enumerate(test_cases): assert_false(any(ds.repo.file_has_content(i) for i in inputs)) ds.run("touch dummy{}".format(idx), inputs=inputs_arg) ok_(all(ds.repo.file_has_content(f) for f in expected_present)) # Globs are stored unexpanded by default. assert_in(inputs_arg[0], ds.repo.format_commit("%B")) ds.repo.drop(inputs, options=["--force"]) # --input can be passed a subdirectory. create_tree(ds.path, {"subdir": {"a": "subdir a", "b": "subdir b"}}) ds.add("subdir") ds.repo.copy_to(["subdir/a", "subdir/b"], remote="origin") ds.repo.drop("subdir", options=["--force"]) ds.run("touch subdir-dummy", inputs=[opj(ds.path, "subdir")]) ok_(all(ds.repo.file_has_content(opj("subdir", f)) for f in ["a", "b"])) # Inputs are specified relative to a dataset's subdirectory. ds.repo.drop(opj("subdir", "a"), options=["--force"]) with chpwd(opj(path, "subdir")): run("touch subdir-dummy1", inputs=["a"]) ok_(ds.repo.file_has_content(opj("subdir", "a"))) # --input=. runs "datalad get ." ds.run("touch dot-dummy", inputs=["."]) eq_(ds.repo.get_annexed_files(), ds.repo.get_annexed_files(with_content_only=True)) # On rerun, we get all files, even those that weren't in the tree at the # time of the run. create_tree(ds.path, {"after-dot-run": "after-dot-run content"}) ds.add(".") ds.repo.copy_to(["after-dot-run"], remote="origin") ds.repo.drop(["after-dot-run"], options=["--force"]) ds.rerun("HEAD^") ds.repo.file_has_content("after-dot-run") # --output will unlock files that are present. ds.repo.get("a.dat") ds.run("echo ' appended' >>a.dat", outputs=["a.dat"]) with open(opj(path, "a.dat")) as fh: eq_(fh.read(), "a.dat appended\n") # --output will remove files that are not present. ds.repo.drop(["a.dat", "d.txt"], options=["--force"]) ds.run("echo ' appended' >>a.dat", outputs=["a.dat"]) with open(opj(path, "a.dat")) as fh: eq_(fh.read(), " appended\n") # --input can be combined with --output. ds.repo.repo.git.reset("--hard", "HEAD~2") ds.run("echo ' appended' >>a.dat", inputs=["a.dat"], outputs=["a.dat"]) with open(opj(path, "a.dat")) as fh: eq_(fh.read(), "a.dat appended\n") with swallow_logs(new_level=logging.DEBUG) as cml: with swallow_outputs(): ds.run("echo blah", outputs=["not-there"]) assert_in("Filtered out non-existing path: ", cml.out) ds.create('sub') ds.run("echo sub_orig >sub/subfile") ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"]) ds.drop("sub/subfile", check=False) ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"]) # --input/--output globs can be stored in expanded form. ds.run("touch expand-dummy", inputs=["a.*"], outputs=["b.*"], expand="both") assert_in("a.dat", ds.repo.format_commit("%B")) assert_in("b.dat", ds.repo.format_commit("%B")) res = ds.rerun(report=True, return_type='item-or-list') eq_(res["run_info"]['inputs'], ["a.dat"]) eq_(res["run_info"]['outputs'], ["b.dat"]) # We install subdatasets to fully resolve globs. ds.uninstall("s0") assert_false(Dataset(op.join(path, "s0")).is_installed()) ds.run("echo {inputs} >globbed-subds", inputs=["s0/s1_*/s2/*.dat"]) ok_file_has_content(op.join(ds.path, "globbed-subds"), "s0/s1_0/s2/a.dat s0/s1_1/s2/c.dat", strip=True) ds_ss = Dataset(op.join(path, "s0", "ss")) assert_false(ds_ss.is_installed()) ds.run("echo blah >{outputs}", outputs=["s0/ss/out"]) ok_(ds_ss.is_installed()) ok_file_has_content(op.join(ds.path, "s0", "ss", "out"), "blah", strip=True)
def test_rerun(path, nodspath): ds = Dataset(path).create() sub = ds.create('sub') probe_path = opj(sub.path, 'sequence') # run inside the dataset with chpwd(path), \ swallow_outputs(): ds.run('echo x$(cat sub/sequence) > sub/sequence') # command ran once, all clean ok_clean_git(ds.path) eq_('x\n', open(probe_path).read()) # now, for a rerun we can be anywhere, PWD and all are recorded # moreover, rerun must figure out which bits to unlock, even in # subdatasets with chpwd(nodspath), \ swallow_outputs(): ds.rerun() ok_clean_git(ds.path) # ran twice now eq_('xx\n', open(probe_path).read()) # Rerunning from a subdataset skips the command. _, sub_info = get_run_info(ds, sub.repo.repo.head.commit.message) eq_(ds.id, sub_info["dsid"]) assert_result_count(sub.rerun(return_type="list", on_failure="ignore"), 1, status="impossible", action="run", rerun_action="skip") eq_('xx\n', open(probe_path).read()) # Rerun fails with a dirty repo. dirt = opj(path, "dirt") with open(dirt, "w") as fh: fh.write("") assert_status('impossible', ds.rerun(on_failure="ignore")) remove(dirt) ok_clean_git(ds.path) # Make a non-run commit. with open(opj(path, "nonrun-file"), "w") as f: f.write("foo") ds.add("nonrun-file") # Now rerun the buried command. ds.rerun(revision="HEAD~", message="rerun buried") eq_('xxx\n', open(probe_path).read()) # Also check that the messasge override worked. eq_( ds.repo.format_commit("%B").splitlines()[0], "[DATALAD RUNCMD] rerun buried") # Or a range of commits, skipping non-run commits. ds.rerun(since="HEAD~3") eq_('xxxxx\n', open(probe_path).read()) # Or --since= to run all reachable commits. ds.rerun(since="") eq_('xxxxxxxxxx\n', open(probe_path).read()) # We can get back a report of what would happen rather than actually # rerunning anything. report = ds.rerun(since="", report=True, return_type="list") # Nothing changed. eq_('xxxxxxxxxx\n', open(probe_path).read()) assert_result_count(report, 1, rerun_action="skip") report[-1]["commit"] == ds.repo.get_hexsha() # If a file is dropped, we remove it instead of unlocking it. ds.drop(probe_path, check=False) with swallow_outputs(): ds.rerun() eq_('x\n', open(probe_path).read()) # If the history to rerun has a merge commit, we abort. ds.repo.checkout("HEAD~3", options=["-b", "topic"]) with open(opj(path, "topic-file"), "w") as f: f.write("topic") ds.add("topic-file") ds.repo.checkout("master") ds.repo.merge("topic") ok_clean_git(ds.path) assert_raises(IncompleteResultsError, ds.rerun)
def test_wtf(path): # smoke test for now with swallow_outputs() as cmo: wtf(dataset=path) assert_not_in('## dataset', cmo.out) assert_in('## configuration', cmo.out) # Those sections get sensored out by default now assert_not_in('user.name: ', cmo.out) with chpwd(path): with swallow_outputs() as cmo: wtf() assert_not_in('## dataset', cmo.out) assert_in('## configuration', cmo.out) # now with a dataset ds = create(path) with swallow_outputs() as cmo: wtf(dataset=ds.path) assert_in('## configuration', cmo.out) assert_in('## dataset', cmo.out) assert_in('path: {}'.format(ds.path), cmo.out) # and if we run with all sensitive for sensitive in ('some', True): with swallow_outputs() as cmo: wtf(dataset=ds.path, sensitive=sensitive) # we fake those for tests anyways, but we do show cfg in this mode # and explicitly not showing them assert_in('user.name: %s' % _HIDDEN, cmo.out) with swallow_outputs() as cmo: wtf(dataset=ds.path, sensitive='all') assert_not_in(_HIDDEN, cmo.out) # all is shown assert_in('user.name: ', cmo.out) # Sections selection # # If we ask for no sections and there is no dataset with chpwd(path): with swallow_outputs() as cmo: wtf(sections=[]) assert_not_in('## dataset', cmo.out) for s in SECTION_CALLABLES: assert_not_in('## %s' % s.lower(), cmo.out.lower()) # ask for a selected set secs = ['git-annex', 'configuration'] with chpwd(path): with swallow_outputs() as cmo: wtf(sections=secs) for s in SECTION_CALLABLES: (assert_in if s in secs else assert_not_in)('## %s' % s.lower(), cmo.out.lower()) # order should match our desired one, not alphabetical assert cmo.out.index('## git-annex') < cmo.out.index( '## configuration') # not achievable from cmdline is to pass an empty list of sections. with chpwd(path): with swallow_outputs() as cmo: wtf(sections=[]) eq_(cmo.out.rstrip(), '# WTF') # and we could decorate it nicely for embedding e.g. into github issues with swallow_outputs() as cmo: wtf(sections=['dependencies'], decor='html_details') ok_startswith(cmo.out, '<details><summary>DataLad %s WTF' % __version__) assert_in('## dependencies', cmo.out) # should result only in '# WTF' skip_if_no_module('pyperclip') # verify that it works correctly in the env/platform import pyperclip with swallow_outputs() as cmo: try: pyperclip.copy("xxx") pyperclip_works = pyperclip.paste().strip() == "xxx" wtf(dataset=ds.path, clipboard=True) except (AttributeError, pyperclip.PyperclipException) as exc: # AttributeError could come from pyperclip if no DISPLAY raise SkipTest(exc_str(exc)) assert_in("WTF information of length", cmo.out) assert_not_in('user.name', cmo.out) if not pyperclip_works: # Some times does not throw but just fails to work raise SkipTest( "Pyperclip seems to be not functioning here correctly") assert_not_in('user.name', pyperclip.paste()) assert_in(_HIDDEN, pyperclip.paste()) # by default no sensitive info assert_in("cmd:annex:", pyperclip.paste()) # but the content is there
def test_basics(path, nodspath): ds = Dataset(path).create() last_state = ds.repo.get_hexsha() # run inside the dataset with chpwd(path), \ swallow_outputs(): # provoke command failure with assert_raises(CommandError) as cme: ds.run('7i3amhmuch9invalid') # let's not speculate that the exit code is always 127 ok_(cme.code > 0) eq_(last_state, ds.repo.get_hexsha()) # now one that must work res = ds.run('touch empty', message='TEST') ok_clean_git(ds.path) assert_result_count(res, 2) # TODO 'state' is still untracked!!! assert_result_count(res, 1, action='add', path=opj(ds.path, 'empty'), type='file') assert_result_count(res, 1, action='save', path=ds.path) commit_msg = ds.repo.repo.head.commit.message ok_(commit_msg.startswith('[DATALAD RUNCMD] TEST')) # crude test that we have a record for the PWD assert_in('"pwd": "."', commit_msg) last_state = ds.repo.get_hexsha() # now run a command that will not alter the dataset res = ds.run('touch empty', message='NOOP_TEST') assert_status('notneeded', res) eq_(last_state, ds.repo.get_hexsha()) # We can also run the command via a single-item list because this is # what the CLI interface passes in for quoted commands. res = ds.run(['touch empty'], message='NOOP_TEST') assert_status('notneeded', res) # run outside the dataset, should still work but with limitations with chpwd(nodspath), \ swallow_outputs(): res = ds.run(['touch', 'empty2'], message='TEST') assert_status('ok', res) assert_result_count(res, 1, action='add', path=opj(ds.path, 'empty2'), type='file') # running without a command is a noop with chpwd(path): with swallow_logs(new_level=logging.WARN) as cml: ds.run() assert_in("No command given", cml.out) # Simple sidecar message checks. ds.run(["touch", "dummy0"], message="sidecar arg", sidecar=True) assert_not_in('"cmd":', ds.repo.repo.head.commit.message) real_get = ds.config.get def mocked_get(key, default=None): if key == "datalad.run.record-sidecar": return True return real_get(key, default) with patch.object(ds.config, "get", mocked_get): ds.run(["touch", "dummy1"], message="sidecar config") assert_not_in('"cmd":', ds.repo.repo.head.commit.message)
def test_add_archive_content(path_orig, url, repo_path): with chpwd(repo_path): # TODO we need to be able to pass path into add_archive_content # We could mock but I mean for the API assert_raises(RuntimeError, add_archive_content, "nonexisting.tar.gz") # no repo yet repo = AnnexRepo(repo_path, create=True) assert_raises(ValueError, add_archive_content, "nonexisting.tar.gz") # we can't add a file from outside the repo ATM assert_raises(FileNotInRepositoryError, add_archive_content, opj(path_orig, '1.tar.gz')) # Let's add first archive to the repo so we could test with swallow_outputs(): repo.add_urls([opj(url, '1.tar.gz')], options=["--pathdepth", "-1"]) for s in range(1, 5): repo.add_urls([opj(url, '%du/1.tar.gz' % s)], options=["--pathdepth", "-2"]) repo.commit("added 1.tar.gz") key_1tar = repo.get_file_key( '1.tar.gz') # will be used in the test later def d1_basic_checks(): ok_(exists('1')) ok_file_under_git('1', '1 f.txt', annexed=True) ok_file_under_git(opj('1', 'd', '1d'), annexed=True) ok_archives_caches(repo_path, 0) # and by default it just does it, everything goes to annex repo_ = add_archive_content('1.tar.gz') eq_(repo.path, repo_.path) d1_basic_checks() # If ran again, should proceed just fine since the content is the same so no changes would be made really add_archive_content('1.tar.gz') # But that other one carries updated file, so should fail due to overwrite with assert_raises(RuntimeError) as cme: add_archive_content(opj('1u', '1.tar.gz'), use_current_dir=True) # TODO: somewhat not precise since we have two possible "already exists" # -- in caching and overwrite check assert_in("already exists", str(cme.exception)) # but should do fine if overrides are allowed add_archive_content(opj('1u', '1.tar.gz'), existing='overwrite', use_current_dir=True) add_archive_content(opj('2u', '1.tar.gz'), existing='archive-suffix', use_current_dir=True) add_archive_content(opj('3u', '1.tar.gz'), existing='archive-suffix', use_current_dir=True) add_archive_content(opj('4u', '1.tar.gz'), existing='archive-suffix', use_current_dir=True) # rudimentary test assert_equal(sorted(map(basename, glob(opj(repo_path, '1', '1*')))), ['1 f-1.1.txt', '1 f-1.2.txt', '1 f-1.txt', '1 f.txt']) whereis = repo.whereis(glob(opj(repo_path, '1', '1*'))) # they all must be the same assert (all([x == whereis[0] for x in whereis[1:]])) # and we should be able to reference it while under subdirectory subdir = opj(repo_path, 'subdir') with chpwd(subdir, mkdir=True): add_archive_content(opj(pardir, '1.tar.gz'), use_current_dir=True) d1_basic_checks() # or we could keep relative path and also demand to keep the archive prefix # while extracting under original (annex root) dir add_archive_content(opj(pardir, '1.tar.gz'), add_archive_leading_dir=True) with chpwd(opj(repo_path, '1')): d1_basic_checks() with chpwd(repo_path): # test with excludes and renames and annex options add_archive_content('1.tar.gz', exclude=['d'], rename=['/ /_', '/^1/2'], annex_options="-c annex.largefiles=exclude=*.txt", delete=True) # no conflicts since new name ok_file_under_git('2', '1_f.txt', annexed=False) assert_false(exists(opj('2', 'd'))) assert_false(exists('1.tar.gz')) # delete was in effect # now test ability to extract within subdir with chpwd(opj(repo_path, 'd1'), mkdir=True): # Let's add first archive to the repo so we could test # named the same way but different content with swallow_outputs(): repo.add_urls([opj(url, 'd1', '1.tar.gz')], options=["--pathdepth", "-1"], cwd=getpwd()) # invoke under current subdir repo.commit("added 1.tar.gz in d1") def d2_basic_checks(): ok_(exists('1')) ok_file_under_git('1', '2 f.txt', annexed=True) ok_file_under_git(opj('1', 'd2', '2d'), annexed=True) ok_archives_caches(repo.path, 0) add_archive_content('1.tar.gz') d2_basic_checks() # in manual tests ran into the situation of inability to obtain on a single run # a file from an archive which was coming from a dropped key. I thought it was # tested in custom remote tests, but I guess not sufficiently well enough repo.drop(opj('1', '1 f.txt')) # should be all kosher repo.get(opj('1', '1 f.txt')) ok_archives_caches(repo.path, 1, persistent=True) ok_archives_caches(repo.path, 0, persistent=False) repo.drop(opj('1', '1 f.txt')) # should be all kosher repo.drop(key_1tar, key=True) # is available from the URL -- should be kosher repo.get(opj('1', '1 f.txt')) # that what managed to not work # TODO: check if persistent archive is there for the 1.tar.gz # We should be able to drop everything since available online with swallow_outputs(): clean(dataset=repo.path) repo.drop(key_1tar, key=True) # is available from the URL -- should be kosher repo.drop(opj('1', '1 f.txt')) # should be all kosher repo.get(opj('1', '1 f.txt')) # and should be able to get it again # bug was that dropping didn't work since archive was dropped first repo.call_annex(["drop", "--all"]) # verify that we can't drop a file if archive key was dropped and online archive was removed or changed size! ;) repo.get(key_1tar, key=True) unlink(opj(path_orig, '1.tar.gz')) with assert_raises(CommandError) as e: repo.drop(key_1tar, key=True) assert_equal(e.kwargs['stdout_json'][0]['success'], False) assert_result_values_cond( e.kwargs['stdout_json'], 'note', lambda x: '(Use --force to override this check, or adjust numcopies.)' in x) assert exists(opj(repo.path, repo.get_contentlocation(key_1tar)))
def test_basic_dsmeta(path): ds = Dataset(path).create() ok_clean_git(path) # ensure clean slate res = ds.metadata(reporton='datasets') assert_result_count(res, 1) _assert_metadata_empty(res[0]['metadata']) # init res = ds.metadata(init=['tag1', 'tag2'], apply2global=True) eq_(res[0]['metadata']['tag'], ['tag1', 'tag2']) # init again does nothing res = ds.metadata(init=['tag3'], apply2global=True) eq_(res[0]['metadata']['tag'], ['tag1', 'tag2']) # reset whole key ds.metadata(reset=['tag'], apply2global=True) res = ds.metadata(reporton='datasets') assert_result_count(res, 1) _assert_metadata_empty(res[0]['metadata']) # add something arbitrary res = ds.metadata(add=dict(dtype=['heavy'], readme=['short', 'long']), apply2global=True, on_failure='ignore') # fails due to unknown keys assert_status('error', res) res = ds.metadata(add=dict(dtype=['heavy'], readme=['short', 'long']), define_key=dict(dtype='is_a_datatype', readme='is_readme_content'), apply2global=True) eq_(res[0]['metadata']['dtype'], 'heavy') # sorted! eq_(res[0]['metadata']['readme'], ['long', 'short']) # check it reports common keys with swallow_outputs() as cmo: ds.metadata(show_keys=True) assert_in('license', cmo.out) # supply key definitions, no need for apply2global res = ds.metadata(define_key=dict(mykey='truth')) eq_(res[0]['metadata']['definition']['mykey'], u'truth') with swallow_outputs() as cmo: ds.metadata(show_keys=True) assert_in('mykey: truth (dataset: {})'.format(ds.path), cmo.out) # re-supply different key definitions -> error res = ds.metadata(define_key=dict(mykey='lie'), on_failure='ignore') assert_result_count( res, 1, status='error', message=("conflicting definition for key '%s': '%s' != '%s'", "mykey", "lie", "truth")) res = ds.metadata(define_key=dict(otherkey='altfact'), ) eq_(res[0]['metadata']['definition']['otherkey'], 'altfact') # 'definition' is a regular key, we can remove items res = ds.metadata(remove=dict(definition=['mykey']), apply2global=True) assert_dict_equal( res[0]['metadata']['definition'], { 'otherkey': u'altfact', 'readme': u'is_readme_content', 'dtype': u'is_a_datatype' }) res = ds.metadata(remove=dict(definition=['otherkey', 'readme', 'dtype']), apply2global=True) # when there are no items left, the key vanishes too assert ('definition' not in res[0]['metadata']) # we still have metadata, so there is a DB file assert (res[0]['metadata']) db_path = opj(ds.path, '.datalad', 'metadata', 'dataset.json') assert (exists(db_path)) ok_clean_git(ds.path) # but if we remove it, the file is gone res = ds.metadata(reset=['readme', 'dtype'], apply2global=True) eq_(res[0]['metadata'], {}) assert (not exists(db_path)) ok_clean_git(ds.path)
def test_aggregation(path): with chpwd(path): assert_raises(InsufficientArgumentsError, aggregate_metadata, None) # a hierarchy of three (super/sub)datasets, each with some native metadata ds = Dataset(opj(path, 'origin')).create(force=True) subds = ds.create('sub', force=True) subsubds = subds.create('subsub', force=True) # aggregate from bottom to top, guess native data, no compacting of graph # should yield 6 meta data sets, one implicit, and one native per dataset # and a second natiev set for the topmost dataset aggregate_metadata(ds, guess_native_type=True, recursive=True) # no only ask the top superdataset, no recursion, just reading from the cache meta = get_metadata( ds, guess_type=False, ignore_subdatasets=False, ignore_cache=False) assert_equal(len(meta), 10) # same schema assert_equal( 10, sum([s.get('@context', {'@vocab': None})['@vocab'] == 'http://schema.org/' for s in meta])) # three different IDs assert_equal(3, len(set([s.get('@id') for s in meta]))) # and we know about all three datasets for name in ('mother_äöü東', 'child_äöü東', 'grandchild_äöü東'): assert_true(sum([s.get('name', None) == assure_unicode(name) for s in meta])) #print(meta) assert_equal( # first implicit, then two natives, then aggregate meta[3]['dcterms:hasPart']['@id'], subds.id) success = False for m in meta: p = m.get('dcterms:hasPart', {}) if p.get('@id', None) == subsubds.id: assert_equal(opj('sub', 'subsub'), p.get('location', None)) success = True assert_true(success) # save the toplevel dataset only (see below) ds.save('with aggregated meta data', all_changes=True) # now clone the beast to simulate a new user installing an empty dataset clone = install(opj(path, 'clone'), source=ds.path) # ID mechanism works assert_equal(ds.id, clone.id) # get fresh meta data, the implicit one for the top-most datasets should # differ, but the rest not clonemeta = get_metadata( clone, guess_type=False, ignore_subdatasets=False, ignore_cache=False) # make sure the implicit md for the topmost come first assert_equal(clonemeta[0]['@id'], clone.id) assert_equal(clonemeta[0]['@id'], ds.id) assert_equal(clone.repo.get_hexsha(), ds.repo.get_hexsha()) assert_equal(clonemeta[0]['version'], ds.repo.get_hexsha()) # all but the implicit is identical assert_equal(clonemeta[1:], meta[1:]) # the implicit md of the clone should list a dataset ID for its subds, # although it has not been obtained! assert_equal( clonemeta[3]['dcterms:hasPart']['@id'], subds.id) # now obtain a subdataset in the clone and the IDs should be updated clone.install('sub') partial = get_metadata(clone, guess_type=False, ignore_cache=True) # ids don't change assert_equal(partial[0]['@id'], clonemeta[0]['@id']) # datasets are properly connected assert_equal(partial[1]['dcterms:hasPart']['@id'], partial[2]['@id']) # query smoke test if os.environ.get('DATALAD_TESTS_NONETWORK'): raise SkipTest assert_equal(len(list(clone.search('mother'))), 1) assert_equal(len(list(clone.search('MoTHER'))), 1) # case insensitive child_res = list(clone.search('child')) assert_equal(len(child_res), 2) # little helper to match names def assert_names(res, names, path=clone.path): assert_equal(list(map(itemgetter(0), res)), [opj(path, n) for n in names]) # should yield (location, report) tuples assert_names(child_res, ['sub', 'sub/subsub']) # result should be identical to invoking search from api # and search_ should spit out locations out with swallow_outputs() as cmo: res = list(search_('child', dataset=clone)) assert_equal(res, child_res) assert_in(res[0][0], cmo.out) # and overarching search_ just for smoke testing of processing outputs # and not puking (e.g. under PY3) with swallow_outputs() as cmo: assert list(search_('.', regex=True, dataset=clone)) assert cmo.out # test searching among specified properties only assert_names(clone.search('i', search='name'), ['sub', 'sub/subsub']) assert_names(clone.search('i', search='keywords'), ['.']) # case shouldn't matter assert_names(clone.search('i', search='Keywords'), ['.']) assert_names(clone.search('i', search=['name', 'keywords']), ['.', 'sub', 'sub/subsub']) # without report_matched, we are getting none of the fields assert(all([not x for x in map(itemgetter(1), child_res)])) # but we would get all if asking for '*' assert(all([len(x) >= 9 for x in map(itemgetter(1), list(clone.search('child', report='*')))])) # but we would get only the matching name if we ask for report_matched assert_equal( set(map(lambda x: tuple(x[1].keys()), clone.search('child', report_matched=True))), set([('name',)]) ) # and the additional field we might have asked with report assert_equal( set(map(lambda x: tuple(sorted(x[1].keys())), clone.search('child', report_matched=True, report=['schema:type']))), set([('name', 'schema:type')]) ) # and if we ask report to be 'empty', we should get no fields child_res_empty = list(clone.search('child', report='')) assert_equal(len(child_res_empty), 2) assert_equal( set(map(lambda x: tuple(x[1].keys()), child_res_empty)), set([tuple()]) ) # more tests on returned paths: assert_names(clone.search('datalad'), ['.', 'sub', 'sub/subsub']) # if we clone subdataset and query for value present in it and its kid clone_sub = clone.install('sub') assert_names(clone_sub.search('datalad'), ['.', 'subsub'], clone_sub.path) # Test 'and' for multiple search entries assert_equal(len(list(clone.search(['child', 'bids']))), 2) assert_equal(len(list(clone.search(['child', 'subsub']))), 1) assert_equal(len(list(clone.search(['bids', 'sub']))), 2) res = list(clone.search('.*', regex=True)) # with regex assert_equal(len(res), 3) # one per dataset # we do search, not match assert_equal(len(list(clone.search('randchild', regex=True))), 1) assert_equal(len(list(clone.search(['gr.nd', 'ch.ld'], regex=True))), 1) assert_equal(len(list(clone.search('randchil.', regex=True))), 1) assert_equal(len(list(clone.search('^randchild.*', regex=True))), 0) assert_equal(len(list(clone.search('^grandchild.*', regex=True))), 1) assert_equal(len(list(clone.search('grandchild'))), 1)
def test_rerun_onto(path): ds = Dataset(path).create() grow_file = opj(path, "grows") # Make sure we can handle range-specifications that yield no results. for since in ["", "HEAD"]: assert_result_count(ds.rerun("HEAD", onto="", since=since, on_failure="ignore"), 1, status="impossible", action="run") ds.run('echo static-content > static') ds.repo.tag("static") with swallow_outputs(): ds.run('echo x$(cat grows) > grows') ds.rerun() eq_('xx\n', open(grow_file).read()) # If we run the "static" change on top of itself, we end up in the # same (but detached) place. ds.rerun(revision="static", onto="static") ok_(ds.repo.get_active_branch() is None) eq_(ds.repo.get_hexsha(), ds.repo.get_hexsha("static")) # If we run the "static" change from the same "base", we end up # with a new commit. ds.repo.checkout("master") with swallow_outputs(): ds.rerun(revision="static", onto="static^") ok_(ds.repo.get_active_branch() is None) neq_(ds.repo.get_hexsha(), ds.repo.get_hexsha("static")) assert_result_count(ds.diff(revision="HEAD..static"), 0) for revrange in ["..static", "static.."]: assert_result_count(ds.repo.repo.git.rev_list(revrange).split(), 1) # Unlike the static change, if we run the ever-growing change on # top of itself, we end up with a new commit. ds.repo.checkout("master") ds.rerun(onto="HEAD") ok_(ds.repo.get_active_branch() is None) neq_(ds.repo.get_hexsha(), ds.repo.get_hexsha("master")) # An empty `onto` means use the parent of the first revision. ds.repo.checkout("master") with swallow_outputs(): ds.rerun(since="static^", onto="") ok_(ds.repo.get_active_branch() is None) for revrange in ["..master", "master.."]: assert_result_count(ds.repo.repo.git.rev_list(revrange).split(), 3) # An empty `onto` means use the parent of the first revision that # has a run command. ds.repo.checkout("master") with swallow_outputs(): ds.rerun(since="", onto="", branch="from-base") eq_(ds.repo.get_active_branch(), "from-base") assert_result_count(ds.diff(revision="master..from-base"), 0) eq_(ds.repo.get_merge_base(["static", "from-base"]), ds.repo.get_hexsha("static^")) # We abort when an explicitly specified `onto` doesn't exist. ds.repo.checkout("master") assert_result_count(ds.rerun(since="", onto="doesnotexist", branch="from-base", on_failure="ignore"), 1, status="error", action="run")
def test_rerun_onto(path): ds = Dataset(path).create() if ds.repo.is_managed_branch(): assert_status('impossible', ds.rerun(onto="triggers-abort", on_failure="ignore")) raise SkipTest("Test incompatible with adjusted branch") # Make sure we have more than one commit. The one commit case is checked # elsewhere. ds.repo.commit(msg="noop commit", options=["--allow-empty"]) grow_file = op.join(path, "grows") # Make sure we can handle range-specifications that yield no results. for since in ["", "HEAD"]: assert_result_count(ds.rerun("HEAD", onto="", since=since, on_failure="ignore"), 1, status="impossible", action="run") ds.run('echo static-content > static') ds.repo.tag("static") with swallow_outputs(): ds.run('echo x$(cat grows) > grows') ds.rerun() eq_('xx\n', open(grow_file).read()) # If we run the "static" change on top of itself, we end up in the # same (but detached) place. ds.rerun(revision="static", onto="static") ok_(ds.repo.get_active_branch() is None) eq_(ds.repo.get_hexsha(), ds.repo.get_hexsha("static")) # If we run the "static" change from the same "base", we end up # with a new commit. ds.repo.checkout(DEFAULT_BRANCH) with swallow_outputs(): ds.rerun(revision="static", onto="static^") ok_(ds.repo.get_active_branch() is None) neq_(ds.repo.get_hexsha(), ds.repo.get_hexsha("static")) ok_(all(r["state"] == "clean" for r in ds.diff(fr="HEAD", to="static"))) for revrange in ["..static", "static.."]: eq_(len(ds.repo.get_revisions(revrange)), 1) # Unlike the static change, if we run the ever-growing change on # top of itself, we end up with a new commit. ds.repo.checkout(DEFAULT_BRANCH) ds.rerun(onto="HEAD") ok_(ds.repo.get_active_branch() is None) neq_(ds.repo.get_hexsha(), ds.repo.get_hexsha(DEFAULT_BRANCH)) # An empty `onto` means use the parent of the first revision. ds.repo.checkout(DEFAULT_BRANCH) with swallow_outputs(): ds.rerun(since="static^", onto="") ok_(ds.repo.get_active_branch() is None) for revrange in [".." + DEFAULT_BRANCH, DEFAULT_BRANCH + ".."]: eq_(len(ds.repo.get_revisions(revrange)), 3) # An empty `onto` means use the parent of the first revision that # has a run command. ds.repo.checkout(DEFAULT_BRANCH) with swallow_outputs(): ds.rerun(since="", onto="", branch="from-base") eq_(ds.repo.get_active_branch(), "from-base") ok_( all(r["state"] == "clean" for r in ds.diff(fr=DEFAULT_BRANCH, to="from-base"))) eq_(ds.repo.get_merge_base(["static", "from-base"]), ds.repo.get_hexsha("static^")) # We abort when an explicitly specified `onto` doesn't exist. ds.repo.checkout(DEFAULT_BRANCH) assert_result_count(ds.rerun(since="", onto="doesnotexist", branch="from-base", on_failure="ignore"), 1, status="error", action="run")
def test_save_obscure_name(path): ds = Dataset(path).create(force=True) fname = OBSCURE_FILENAME # Just check that we don't fail with a unicode error. with swallow_outputs(): ds.save(path=fname, result_renderer="default")
def test_rerun(path, nodspath): ds = Dataset(path).create() sub = ds.create('sub') probe_path = op.join(sub.path, 'sequence') # run inside the dataset with chpwd(path), \ swallow_outputs(): ds.run('echo x$(cat sub/sequence) > sub/sequence') # command ran once, all clean assert_repo_status(ds.path) eq_('x\n', open(probe_path).read()) # now, for a rerun we can be anywhere, PWD and all are recorded # moreover, rerun must figure out which bits to unlock, even in # subdatasets with chpwd(nodspath), \ swallow_outputs(): ds.rerun() assert_repo_status(ds.path) # ran twice now eq_('xx\n', open(probe_path).read()) # Rerunning from a subdataset skips the command. _, sub_info = get_run_info(ds, sub.repo.format_commit("%B")) eq_(ds.id, sub_info["dsid"]) assert_result_count(sub.rerun(return_type="list", on_failure="ignore"), 1, status="impossible", action="run", rerun_action="skip") eq_('xx\n', open(probe_path).read()) # Rerun fails with a dirty repo. dirt = op.join(path, "dirt") with open(dirt, "w") as fh: fh.write("") assert_status('impossible', ds.rerun(on_failure="ignore")) remove(dirt) assert_repo_status(ds.path) # Make a non-run commit. with open(op.join(path, "nonrun-file"), "w") as f: f.write("foo") ds.save("nonrun-file") # Now rerun the buried command. ds.rerun(revision="HEAD~", message="rerun buried") eq_('xxx\n', open(probe_path).read()) # Also check that the messasge override worked. eq_( ds.repo.format_commit("%B").splitlines()[0], "[DATALAD RUNCMD] rerun buried") # Or a range of commits, skipping non-run commits. ds.rerun(since="HEAD~3") eq_('xxxxx\n', open(probe_path).read()) # Or --since= to run all reachable commits. ds.rerun(since="") eq_('xxxxxxxxxx\n', open(probe_path).read()) # We can get back a report of what would happen rather than actually # rerunning anything. report = ds.rerun(since="", report=True, return_type="list") # The "diff" section of the report doesn't include the unchanged files that # would come in "-f json diff" output. for entry in report: if entry["rerun_action"] == "run": # None of the run commits touch .datalad/config or any other config # file. assert_false( any(r["path"].endswith("config") for r in entry["diff"])) # Nothing changed. eq_('xxxxxxxxxx\n', open(probe_path).read()) assert_result_count(report, 1, rerun_action="skip-or-pick") report[-1]["commit"] == ds.repo.get_hexsha() # If a file is dropped, we remove it instead of unlocking it. ds.drop(probe_path, check=False) with swallow_outputs(): ds.rerun() eq_('x\n', open(probe_path).read())
def check_create_obscure(create_kwargs, path): with chpwd(path): with swallow_outputs(): ds = create(result_renderer="default", **create_kwargs) ok_(ds.is_installed())
def test_rerun(path, nodspath): ds = Dataset(path).create() sub = ds.create('sub') probe_path = opj(sub.path, 'sequence') # run inside the dataset with chpwd(path), \ swallow_outputs(): ds.run('echo x$(cat sub/sequence) > sub/sequence') # command ran once, all clean ok_clean_git(ds.path) eq_('x\n', open(probe_path).read()) # now, for a rerun we can be anywhere, PWD and all are recorded # moreover, rerun must figure out which bits to unlock, even in # subdatasets with chpwd(nodspath), \ swallow_outputs(): ds.rerun() ok_clean_git(ds.path) # ran twice now eq_('xx\n', open(probe_path).read()) # Rerunning from a subdataset skips the command. _, sub_info = get_run_info(ds, sub.repo.repo.head.commit.message) eq_(ds.id, sub_info["dsid"]) assert_result_count( sub.rerun(return_type="list", on_failure="ignore"), 1, status="impossible", action="run", rerun_action="skip") eq_('xx\n', open(probe_path).read()) # Rerun fails with a dirty repo. dirt = opj(path, "dirt") with open(dirt, "w") as fh: fh.write("") assert_status('impossible', ds.rerun(on_failure="ignore")) remove(dirt) ok_clean_git(ds.path) # Make a non-run commit. with open(opj(path, "nonrun-file"), "w") as f: f.write("foo") ds.add("nonrun-file") # Now rerun the buried command. ds.rerun(revision="HEAD~", message="rerun buried") eq_('xxx\n', open(probe_path).read()) # Also check that the messasge override worked. eq_(ds.repo.format_commit("%B").splitlines()[0], "[DATALAD RUNCMD] rerun buried") # Or a range of commits, skipping non-run commits. ds.rerun(since="HEAD~3") eq_('xxxxx\n', open(probe_path).read()) # Or --since= to run all reachable commits. ds.rerun(since="") eq_('xxxxxxxxxx\n', open(probe_path).read()) # We can get back a report of what would happen rather than actually # rerunning anything. report = ds.rerun(since="", report=True, return_type="list") # Nothing changed. eq_('xxxxxxxxxx\n', open(probe_path).read()) assert_result_count(report, 1, rerun_action="skip") report[-1]["commit"] == ds.repo.get_hexsha() # If a file is dropped, we remove it instead of unlocking it. ds.drop(probe_path, check=False) ds.rerun() eq_('x\n', open(probe_path).read()) # If the history to rerun has a merge commit, we abort. ds.repo.checkout("HEAD~3", options=["-b", "topic"]) with open(opj(path, "topic-file"), "w") as f: f.write("topic") ds.add("topic-file") ds.repo.checkout("master") ds.repo.merge("topic") ok_clean_git(ds.path) assert_raises(IncompleteResultsError, ds.rerun)