def test_diff_rsync_syntax(path): # three nested datasets ds = Dataset(path).create() subds = ds.create('sub') subsubds = subds.create(Path('subdir', 'deep')) justtop = ds.diff(fr=PRE_INIT_COMMIT_SHA, path='sub') # we only get a single result, the subdataset in question assert_result_count(justtop, 1) assert_result_count(justtop, 1, type='dataset', path=subds.path) # now with "peak inside the dataset" syntax inside = ds.diff(fr=PRE_INIT_COMMIT_SHA, path='sub' + os.sep) # we get both subdatasets, but nothing else inside the nested one assert_result_count(inside, 2, type='dataset') assert_result_count(inside, 1, type='dataset', path=subds.path) assert_result_count(inside, 1, type='dataset', path=subsubds.path) assert_result_count(inside, 0, type='file', parentds=subsubds.path) # if we point to the subdir in 'sub' the reporting wrt the subsubds # doesn't change. It is merely a path constraint within the queried # subds, but because the subsubds is still underneath it, nothing changes inside_subdir = ds.diff(fr=PRE_INIT_COMMIT_SHA, path=op.join('sub', 'subdir')) assert_result_count(inside_subdir, 2, type='dataset') assert_result_count(inside_subdir, 1, type='dataset', path=subds.path) assert_result_count(inside_subdir, 1, type='dataset', path=subsubds.path) assert_result_count(inside_subdir, 0, type='file', parentds=subsubds.path) # but the rest is different (e.g. all the stuff in .datalad is gone) neq_(inside, inside_subdir) # just for completeness, we get more when going full recursive rec = ds.diff(fr=PRE_INIT_COMMIT_SHA, recursive=True, path='sub' + os.sep) assert(len(inside) < len(rec))
def test_rerun_onto(path): ds = Dataset(path).create() grow_file = opj(path, "grows") # Make sure we can handle range-specifications that yield no results. for since in ["", "HEAD"]: assert_result_count( ds.rerun("HEAD", onto="", since=since, on_failure="ignore"), 1, status="impossible", action="run") ds.run('echo static-content > static') ds.repo.tag("static") ds.run('echo x$(cat grows) > grows') ds.rerun() eq_('xx\n', open(grow_file).read()) # If we run the "static" change on top of itself, we end up in the # same (but detached) place. ds.rerun(revision="static", onto="static") ok_(ds.repo.get_active_branch() is None) eq_(ds.repo.get_hexsha(), ds.repo.get_hexsha("static")) # If we run the "static" change from the same "base", we end up # with a new commit. ds.repo.checkout("master") ds.rerun(revision="static", onto="static^") ok_(ds.repo.get_active_branch() is None) neq_(ds.repo.get_hexsha(), ds.repo.get_hexsha("static")) assert_result_count(ds.diff(revision="HEAD..static"), 0) for revrange in ["..static", "static.."]: assert_result_count( ds.repo.repo.git.rev_list(revrange).split(), 1) # Unlike the static change, if we run the ever-growing change on # top of itself, we end up with a new commit. ds.repo.checkout("master") ds.rerun(onto="HEAD") ok_(ds.repo.get_active_branch() is None) neq_(ds.repo.get_hexsha(), ds.repo.get_hexsha("master")) # An empty `onto` means use the parent of the first revision. ds.repo.checkout("master") ds.rerun(since="static^", onto="") ok_(ds.repo.get_active_branch() is None) for revrange in ["..master", "master.."]: assert_result_count( ds.repo.repo.git.rev_list(revrange).split(), 3) # An empty `onto` means use the parent of the first revision that # has a run command. ds.repo.checkout("master") ds.rerun(since="", onto="", branch="from-base") eq_(ds.repo.get_active_branch(), "from-base") assert_result_count(ds.diff(revision="master..from-base"), 0) eq_(ds.repo.get_merge_base(["static", "from-base"]), ds.repo.get_hexsha("static^"))
def test_diff_nods(path, otherpath): ds = Dataset(path).create() assert_result_count(ds.diff(path=otherpath, on_failure='ignore'), 1, status='error', message='path not underneath this dataset') otherds = Dataset(otherpath).create() assert_result_count( ds.diff(path=otherpath, on_failure='ignore'), 1, path=otherds.path, status='error', message=('dataset containing given paths is not underneath the ' 'reference dataset %s: %s', ds, otherds.path))
def test_diff_nonexistent_ref_unicode(path): ds = Dataset(path).create() assert_result_count( ds.diff(fr="HEAD", to=u"β", on_failure="ignore"), 1, path=ds.path, status="impossible")
def test_no_worktree_impact_false_deletions(path): ds = Dataset(path).create() # create a branch that has no new content ds.repo.call_git(['checkout', '-b', 'test']) # place two successive commits with file additions into the default branch ds.repo.call_git(['checkout', DEFAULT_BRANCH]) (ds.pathobj / 'identical').write_text('should be') ds.save() (ds.pathobj / 'new').write_text('yes') ds.save() # now perform a diff for the last commit, there is one file that remained # identifical ds.repo.call_git(['checkout', 'test']) res = ds.diff(fr=DEFAULT_BRANCH + '~1', to=DEFAULT_BRANCH, result_renderer=None) # under no circumstances can there be any reports on deleted files # because we never deleted anything assert_result_count(res, 0, state='deleted') # the identical file must be reported clean assert_result_count( res, 1, state='clean', path=str(ds.pathobj / 'identical'), )
def test_diff_nods(path, otherpath): ds = Dataset(path).create() assert_result_count( ds.diff(path=otherpath, on_failure='ignore'), 1, status='error', message='path not underneath this dataset') otherds = Dataset(otherpath).create() assert_result_count( ds.diff(path=otherpath, on_failure='ignore'), 1, path=otherds.path, status='error', message=( 'dataset containing given paths is not underneath the ' 'reference dataset %s: %s', ds, otherds.path) )
def test_diff_rsync_syntax(path): # three nested datasets ds = Dataset(path).create() subds = ds.create('sub') subsubds = subds.create('deep') justtop = ds.diff(fr=PRE_INIT_COMMIT_SHA, path='sub') # we only get a single result, the subdataset in question assert_result_count(justtop, 1) assert_result_count(justtop, 1, type='dataset', path=subds.path) # now with "peak inside the dataset" syntax inside = ds.diff(fr=PRE_INIT_COMMIT_SHA, path='sub' + os.sep) # we get both subdatasets, but nothing else inside the nested one assert_result_count(inside, 2, type='dataset') assert_result_count(inside, 1, type='dataset', path=subds.path) assert_result_count(inside, 1, type='dataset', path=subsubds.path) assert_result_count(inside, 0, type='file', parentds=subsubds.path) # just for completeness, we get more when going full recursive rec = ds.diff(fr=PRE_INIT_COMMIT_SHA, recursive=True, path='sub' + os.sep) assert(len(inside) < len(rec))
def test_diff_fr_none_one_get_content_annexinfo_call(path): from datalad.support.annexrepo import AnnexRepo ds = Dataset(path).create() (ds.pathobj / "foo").write_text("foo") ds.save() # get_content_annexinfo() is expensive. If fr=None, we should # only need to call it once. with patch.object(AnnexRepo, "get_content_annexinfo") as gca: res = ds.diff(fr=None, to="HEAD", annex="all", result_renderer=None) eq_(gca.call_count, 1)
def test_diff_rsync_syntax(path): # three nested datasets ds = Dataset(path).create() subds = ds.create('sub') subsubds = subds.create('deep') justtop = ds.diff(fr=PRE_INIT_COMMIT_SHA, path='sub') # we only get a single result, the subdataset in question assert_result_count(justtop, 1) assert_result_count(justtop, 1, type='dataset', path=subds.path) # now with "peak inside the dataset" syntax inside = ds.diff(fr=PRE_INIT_COMMIT_SHA, path='sub' + os.sep) # we get both subdatasets, but nothing else inside the nested one assert_result_count(inside, 2, type='dataset') assert_result_count(inside, 1, type='dataset', path=subds.path) assert_result_count(inside, 1, type='dataset', path=subsubds.path) assert_result_count(inside, 0, type='file', parentds=subsubds.path) # just for completeness, we get more when going full recursive rec = ds.diff(fr=PRE_INIT_COMMIT_SHA, recursive=True, path='sub' + os.sep) assert (len(inside) < len(rec))
def test_diff_recursive(path): ds = Dataset(path).create() sub = ds.create('sub') # look at the last change, and confirm a dataset was added res = ds.diff(revision='HEAD~1..HEAD') assert_result_count(res, 1, action='diff', state='added', path=sub.path, type='dataset') # now recursive res = ds.diff(recursive=True, revision='HEAD~1..HEAD') # we also get the entire diff of the subdataset from scratch assert_status('ok', res) ok_(len(res) > 3) # one specific test assert_result_count(res, 1, action='diff', state='added', path=opj(sub.path, '.datalad', 'config')) # now we add a file to just the parent create_tree(ds.path, {'onefile': 'tobeadded', 'sub': {'twofile': 'tobeadded'}}) res = ds.diff(recursive=True, report_untracked='all') assert_result_count(res, 3) assert_result_count(res, 1, action='diff', state='untracked', path=opj(ds.path, 'onefile'), type='file') assert_result_count(res, 1, action='diff', state='modified', path=sub.path, type='dataset') assert_result_count(res, 1, action='diff', state='untracked', path=opj(sub.path, 'twofile'), type='file') # save sub sub.add('.') # save sub in parent ds.save() # save addition in parent ds.add('.') ok_clean_git(ds.path) # look at the last change, only one file was added res = ds.diff(revision='HEAD~1..HEAD') assert_result_count(res, 1) assert_result_count(res, 1, action='diff', state='added', path=opj(ds.path, 'onefile'), type='file') # now the exact same thing with recursion, must not be different from the call # above res = ds.diff(recursive=True, revision='HEAD~1..HEAD') assert_result_count(res, 1) # last change in parent assert_result_count(res, 1, action='diff', state='added', path=opj(ds.path, 'onefile'), type='file') # one further back brings in the modified subdataset, and the added file within it res = ds.diff(recursive=True, revision='HEAD~2..HEAD') assert_result_count(res, 3) assert_result_count(res, 1, action='diff', state='added', path=opj(ds.path, 'onefile'), type='file') assert_result_count(res, 1, action='diff', state='added', path=opj(sub.path, 'twofile'), type='file') assert_result_count(res, 1, action='diff', state='modified', path=sub.path, type='dataset')
def test_diff(path, norepo): with chpwd(norepo): assert_raises(NoDatasetArgumentFound, diff) ds = Dataset(path).create() assert_repo_status(ds.path) # reports stupid revision input assert_result_count( ds.diff(fr='WTF', on_failure='ignore'), 1, status='impossible', message="Git reference 'WTF' invalid") # no diff assert_result_count(_dirty_results(ds.diff()), 0) assert_result_count(_dirty_results(ds.diff(fr='HEAD')), 0) # bogus path makes no difference assert_result_count(_dirty_results(ds.diff(path='THIS', fr='HEAD')), 0) # let's introduce a known change create_tree(ds.path, {'new': 'empty'}) ds.save(to_git=True) assert_repo_status(ds.path) res = _dirty_results(ds.diff(fr='HEAD~1')) assert_result_count(res, 1) assert_result_count( res, 1, action='diff', path=op.join(ds.path, 'new'), state='added') # we can also find the diff without going through the dataset explicitly with chpwd(ds.path): assert_result_count( _dirty_results(diff(fr='HEAD~1')), 1, action='diff', path=op.join(ds.path, 'new'), state='added') # no diff against HEAD assert_result_count(_dirty_results(ds.diff()), 0) # modify known file create_tree(ds.path, {'new': 'notempty'}) res = _dirty_results(ds.diff()) assert_result_count(res, 1) assert_result_count( res, 1, action='diff', path=op.join(ds.path, 'new'), state='modified') # but if we give another path, it doesn't show up assert_result_count(ds.diff(path='otherpath'), 0) # giving the right path must work though assert_result_count( ds.diff(path='new'), 1, action='diff', path=op.join(ds.path, 'new'), state='modified') # stage changes ds.repo.add('.', git=True) # no change in diff, staged is not commited assert_result_count(_dirty_results(ds.diff()), 1) ds.save() assert_repo_status(ds.path) assert_result_count(_dirty_results(ds.diff()), 0) # untracked stuff create_tree(ds.path, {'deep': {'down': 'untracked', 'down2': 'tobeadded'}}) # a plain diff should report the untracked file # but not directly, because the parent dir is already unknown res = _dirty_results(ds.diff()) assert_result_count(res, 1) assert_result_count( res, 1, state='untracked', type='directory', path=op.join(ds.path, 'deep')) # report of individual files is also possible assert_result_count( ds.diff(untracked='all'), 2, state='untracked', type='file') # an unmatching path will hide this result assert_result_count(ds.diff(path='somewhere'), 0) # perfect match and anything underneath will do assert_result_count( ds.diff(path='deep'), 1, state='untracked', path=op.join(ds.path, 'deep'), type='directory') assert_result_count( ds.diff(path='deep'), 1, state='untracked', path=op.join(ds.path, 'deep')) ds.repo.add(op.join('deep', 'down2'), git=True) # now the remaining file is the only untracked one assert_result_count( ds.diff(), 1, state='untracked', path=op.join(ds.path, 'deep', 'down'), type='file')
def test_path_diff(_path, linkpath): # do the setup on the real path, not the symlink, to have its # bugs not affect this test of status() ds = get_deeply_nested_structure(str(_path)) if has_symlink_capability(): # make it more complicated by default ut.Path(linkpath).symlink_to(_path, target_is_directory=True) path = linkpath else: path = _path ds = Dataset(path) if not on_windows: # TODO test should also be has_symlink_capability(), but # something in the repo base class is not behaving yet # check the premise of this test assert ds.pathobj != ds.repo.pathobj plain_recursive = ds.diff(recursive=True, annex='all') # check integrity of individual reports with a focus on how symlinks # are reported for res in plain_recursive: # anything that is an "intended" symlink should be reported # as such. In contrast, anything that is a symlink for mere # technical reasons (annex using it for something in some mode) # should be reported as the thing it is representing (i.e. # a file) if 'link2' in text_type(res['path']): assert res['type'] == 'symlink', res else: assert res['type'] != 'symlink', res # every item must report its parent dataset assert_in('parentds', res) # bunch of smoke tests # query of '.' is same as no path eq_(plain_recursive, ds.diff(path='.', recursive=True, annex='all')) # duplicate paths do not change things eq_(plain_recursive, ds.diff(path=['.', '.'], recursive=True, annex='all')) # neither do nested paths eq_(plain_recursive, ds.diff(path=['.', 'subds_modified'], recursive=True, annex='all')) # when invoked in a subdir of a dataset it still reports on the full thing # just like `git status`, as long as there are no paths specified with chpwd(op.join(path, 'directory_untracked')): plain_recursive = diff(recursive=True, annex='all') # should be able to take absolute paths and yield the same # output eq_(plain_recursive, ds.diff(path=ds.path, recursive=True, annex='all')) # query for a deeply nested path from the top, should just work with a # variety of approaches rpath = op.join('subds_modified', 'subds_lvl1_modified', u'{}_directory_untracked'.format(OBSCURE_FILENAME)) apathobj = ds.pathobj / rpath apath = text_type(apathobj) for p in (rpath, apath, None): if p is None: # change into the realpath of the dataset and # query with an explicit path with chpwd(ds.path): res = ds.diff( path=op.join('.', rpath), recursive=True, annex='all') else: res = ds.diff( path=p, recursive=True, annex='all') assert_result_count( res, 1, state='untracked', type='directory', refds=ds.path, # path always comes out a full path inside the queried dataset path=apath, ) assert_result_count( ds.diff( recursive=True), 1, path=apath) # limiting recursion will exclude this particular path assert_result_count( ds.diff( recursive=True, recursion_limit=1), 0, path=apath) # negative limit is unlimited limit eq_( ds.diff(recursive=True, recursion_limit=-1), ds.diff(recursive=True) )
def test_path_diff(_path, linkpath): # do the setup on the real path, not the symlink, to have its # bugs not affect this test of status() ds = get_deeply_nested_structure(str(_path)) if has_symlink_capability(): # make it more complicated by default ut.Path(linkpath).symlink_to(_path, target_is_directory=True) path = linkpath else: path = _path ds = Dataset(path) if has_symlink_capability(): assert ds.pathobj != ds.repo.pathobj plain_recursive = ds.diff(recursive=True, annex='all', result_renderer=None) # check integrity of individual reports with a focus on how symlinks # are reported for res in plain_recursive: # anything that is an "intended" symlink should be reported # as such. In contrast, anything that is a symlink for mere # technical reasons (annex using it for something in some mode) # should be reported as the thing it is representing (i.e. # a file) if 'link2' in str(res['path']): assert res['type'] == 'symlink', res else: assert res['type'] != 'symlink', res # every item must report its parent dataset assert_in('parentds', res) # bunch of smoke tests # query of '.' is same as no path eq_(plain_recursive, ds.diff(path='.', recursive=True, annex='all', result_renderer=None)) # duplicate paths do not change things eq_( plain_recursive, ds.diff(path=['.', '.'], recursive=True, annex='all', result_renderer=None)) # neither do nested paths if not "2.24.0" <= ds.repo.git_version < "2.25.0": # Release 2.24.0 contained a regression that was fixed with 072a231016 # (2019-12-10). eq_( plain_recursive, ds.diff(path=['.', 'subds_modified'], recursive=True, annex='all', result_renderer=None)) # when invoked in a subdir of a dataset it still reports on the full thing # just like `git status`, as long as there are no paths specified with chpwd(op.join(path, 'directory_untracked')): plain_recursive = diff(recursive=True, annex='all', result_renderer=None) # should be able to take absolute paths and yield the same # output eq_( plain_recursive, ds.diff(path=ds.path, recursive=True, annex='all', result_renderer=None)) # query for a deeply nested path from the top, should just work with a # variety of approaches rpath = op.join('subds_modified', 'subds_lvl1_modified', u'{}_directory_untracked'.format(OBSCURE_FILENAME)) apathobj = ds.pathobj / rpath apath = str(apathobj) for p in (rpath, apath, None): if p is None: # change into the realpath of the dataset and # query with an explicit path with chpwd(ds.path): res = ds.diff(path=op.join('.', rpath), recursive=True, annex='all', result_renderer=None) else: res = ds.diff(path=p, recursive=True, annex='all', result_renderer=None) assert_result_count( res, 1, state='untracked', type='directory', refds=ds.path, # path always comes out a full path inside the queried dataset path=apath, ) assert_result_count(ds.diff(recursive=True, result_renderer=None), 1, path=apath) # limiting recursion will exclude this particular path assert_result_count(ds.diff(recursive=True, recursion_limit=1, result_renderer=None), 0, path=apath) # negative limit is unlimited limit eq_(ds.diff(recursive=True, recursion_limit=-1, result_renderer=None), ds.diff(recursive=True, result_renderer=None))
def test_diff_recursive(path): ds = Dataset(path).create() sub = ds.create('sub') # look at the last change, and confirm a dataset was added res = ds.diff(fr=DEFAULT_BRANCH + '~1', to=DEFAULT_BRANCH, result_renderer=None) assert_result_count(res, 1, action='diff', state='added', path=sub.path, type='dataset') # now recursive res = ds.diff(recursive=True, fr=DEFAULT_BRANCH + '~1', to=DEFAULT_BRANCH, result_renderer=None) # we also get the entire diff of the subdataset from scratch assert_status('ok', res) ok_(len(res) > 3) # one specific test assert_result_count(res, 1, action='diff', state='added', path=op.join(sub.path, '.datalad', 'config')) # now we add a file to just the parent create_tree(ds.path, { 'onefile': 'tobeadded', 'sub': { 'twofile': 'tobeadded' } }) res = ds.diff(recursive=True, untracked='all', result_renderer=None) assert_result_count(_dirty_results(res), 3) assert_result_count(res, 1, action='diff', state='untracked', path=op.join(ds.path, 'onefile'), type='file') assert_result_count(res, 1, action='diff', state='modified', path=sub.path, type='dataset') assert_result_count(res, 1, action='diff', state='untracked', path=op.join(sub.path, 'twofile'), type='file') # intentional save in two steps to make check below easier ds.save('sub', recursive=True) ds.save() assert_repo_status(ds.path) head_ref = DEFAULT_BRANCH if ds.repo.is_managed_branch() else 'HEAD' # look at the last change, only one file was added res = ds.diff(fr=head_ref + '~1', to=head_ref, result_renderer=None) assert_result_count(_dirty_results(res), 1) assert_result_count(res, 1, action='diff', state='added', path=op.join(ds.path, 'onefile'), type='file') # now the exact same thing with recursion, must not be different from the # call above res = ds.diff(recursive=True, fr=head_ref + '~1', to=head_ref, result_renderer=None) assert_result_count(_dirty_results(res), 1) # last change in parent assert_result_count(res, 1, action='diff', state='added', path=op.join(ds.path, 'onefile'), type='file') if ds.repo.is_managed_branch(): raise SkipTest( "Test assumption broken: https://github.com/datalad/datalad/issues/3818" ) # one further back brings in the modified subdataset, and the added file # within it res = ds.diff(recursive=True, fr=head_ref + '~2', to=head_ref, result_renderer=None) assert_result_count(_dirty_results(res), 3) assert_result_count(res, 1, action='diff', state='added', path=op.join(ds.path, 'onefile'), type='file') assert_result_count(res, 1, action='diff', state='added', path=op.join(sub.path, 'twofile'), type='file') assert_result_count(res, 1, action='diff', state='modified', path=sub.path, type='dataset')
def test_diff(path, norepo): with chpwd(norepo): assert_raises(NoDatasetFound, diff) ds = Dataset(path).create() assert_repo_status(ds.path) # reports stupid revision input assert_result_count(ds.diff(fr='WTF', on_failure='ignore', result_renderer=None), 1, status='impossible', message="Git reference 'WTF' invalid") # no diff assert_result_count(_dirty_results(ds.diff(result_renderer=None)), 0) assert_result_count( _dirty_results(ds.diff(fr='HEAD', result_renderer=None)), 0) # bogus path makes no difference assert_result_count( _dirty_results(ds.diff(path='THIS', fr='HEAD', result_renderer=None)), 0) # let's introduce a known change create_tree(ds.path, {'new': 'empty'}) ds.save(to_git=True) assert_repo_status(ds.path) if ds.repo.is_managed_branch(): fr_base = DEFAULT_BRANCH to = DEFAULT_BRANCH else: fr_base = "HEAD" to = None res = _dirty_results( ds.diff(fr=fr_base + '~1', to=to, result_renderer=None)) assert_result_count(res, 1) assert_result_count(res, 1, action='diff', path=op.join(ds.path, 'new'), state='added') # we can also find the diff without going through the dataset explicitly with chpwd(ds.path): assert_result_count(_dirty_results( diff(fr=fr_base + '~1', to=to, result_renderer=None)), 1, action='diff', path=op.join(ds.path, 'new'), state='added') # no diff against HEAD assert_result_count(_dirty_results(ds.diff(result_renderer=None)), 0) # modify known file create_tree(ds.path, {'new': 'notempty'}) res = _dirty_results(ds.diff(result_renderer=None)) assert_result_count(res, 1) assert_result_count(res, 1, action='diff', path=op.join(ds.path, 'new'), state='modified') # but if we give another path, it doesn't show up assert_result_count(ds.diff(path='otherpath', result_renderer=None), 0) # giving the right path must work though assert_result_count(ds.diff(path='new', result_renderer=None), 1, action='diff', path=op.join(ds.path, 'new'), state='modified') # stage changes ds.repo.add('.', git=True) # no change in diff, staged is not commited assert_result_count(_dirty_results(ds.diff(result_renderer=None)), 1) ds.save() assert_repo_status(ds.path) assert_result_count(_dirty_results(ds.diff(result_renderer=None)), 0) # untracked stuff create_tree(ds.path, {'deep': {'down': 'untracked', 'down2': 'tobeadded'}}) # a plain diff should report the untracked file # but not directly, because the parent dir is already unknown res = _dirty_results(ds.diff(result_renderer=None)) assert_result_count(res, 1) assert_result_count(res, 1, state='untracked', type='directory', path=op.join(ds.path, 'deep')) # report of individual files is also possible assert_result_count(ds.diff(untracked='all', result_renderer=None), 2, state='untracked', type='file') # an unmatching path will hide this result assert_result_count(ds.diff(path='somewhere', result_renderer=None), 0) # perfect match and anything underneath will do assert_result_count(ds.diff(path='deep', result_renderer=None), 1, state='untracked', path=op.join(ds.path, 'deep'), type='directory') assert_result_count(ds.diff(path='deep', result_renderer=None), 1, state='untracked', path=op.join(ds.path, 'deep')) ds.repo.add(op.join('deep', 'down2'), git=True) # now the remaining file is the only untracked one assert_result_count(ds.diff(result_renderer=None), 1, state='untracked', path=op.join(ds.path, 'deep', 'down'), type='file')
def test_diff(path, norepo): with chpwd(norepo): assert_status('impossible', diff(on_failure='ignore')) ds = Dataset(path).create() ok_clean_git(ds.path) # reports stupid revision input assert_result_count(ds.diff(revision='WTF', on_failure='ignore'), 1, status='impossible', message="fatal: bad revision 'WTF'") assert_result_count(ds.diff(), 0) # no diff assert_result_count(ds.diff(), 0) assert_result_count(ds.diff(revision='HEAD'), 0) # bogus path makes no difference assert_result_count(ds.diff(path='THIS', revision='HEAD'), 0) # comparing to a previous state we should get a diff in most cases # for this test, let's not care what exactly it is -- will do later assert len(ds.diff(revision='HEAD~1')) > 0 # let's introduce a known change create_tree(ds.path, {'new': 'empty'}) ds.add('.', to_git=True) ok_clean_git(ds.path) res = ds.diff(revision='HEAD~1') assert_result_count(res, 1) assert_result_count(res, 1, action='diff', path=opj(ds.path, 'new'), state='added') # we can also find the diff without going through the dataset explicitly with chpwd(ds.path): assert_result_count(diff(revision='HEAD~1'), 1, action='diff', path=opj(ds.path, 'new'), state='added') # no diff against HEAD assert_result_count(ds.diff(), 0) # modify known file create_tree(ds.path, {'new': 'notempty'}) for diffy in (None, 'HEAD'): res = ds.diff(revision=diffy) assert_result_count(res, 1) assert_result_count(res, 1, action='diff', path=opj(ds.path, 'new'), state='modified') # but if we give another path, it doesn't show up assert_result_count(ds.diff('otherpath'), 0) # giving the right path must work though assert_result_count(ds.diff('new'), 1, action='diff', path=opj(ds.path, 'new'), state='modified') # stage changes ds.add('.', to_git=True, save=False) # no diff, because we staged the modification assert_result_count(ds.diff(), 0) # but we can get at it assert_result_count(ds.diff(staged=True), 1, action='diff', path=opj(ds.path, 'new'), state='modified') # OR assert_result_count(ds.diff(revision='HEAD'), 1, action='diff', path=opj(ds.path, 'new'), state='modified') ds.save() ok_clean_git(ds.path) # untracked stuff create_tree(ds.path, {'deep': {'down': 'untracked', 'down2': 'tobeadded'}}) # a plain diff should report the untracked file # but not directly, because the parent dir is already unknown res = ds.diff() assert_result_count(res, 1) assert_result_count(res, 1, state='untracked', type='directory', path=opj(ds.path, 'deep')) # report of individual files is also possible assert_result_count(ds.diff(report_untracked='all'), 2, state='untracked', type='file') # an unmatching path will hide this result assert_result_count(ds.diff(path='somewhere'), 0) # perfect match and anything underneath will do assert_result_count(ds.diff(path='deep'), 1, state='untracked', path=opj(ds.path, 'deep'), type='directory') assert_result_count(ds.diff(path='deep'), 1, state='untracked', path=opj(ds.path, 'deep')) # now we stage on of the two files in deep ds.add(opj('deep', 'down2'), to_git=True, save=False) # without any reference it will ignore the staged stuff and report the remaining # untracked file assert_result_count(ds.diff(), 1, state='untracked', path=opj(ds.path, 'deep', 'down'), type='file') res = ds.diff(staged=True) assert_result_count(res, 1, state='untracked', path=opj(ds.path, 'deep', 'down'), type='file') assert_result_count(res, 1, state='added', path=opj(ds.path, 'deep', 'down2'), type='file')
def test_diff_recursive(path): ds = Dataset(path).create() sub = ds.create('sub') # look at the last change, and confirm a dataset was added res = ds.diff(revision='HEAD~1..HEAD') assert_result_count(res, 1, action='diff', state='added', path=sub.path, type='dataset') # now recursive res = ds.diff(recursive=True, revision='HEAD~1..HEAD') # we also get the entire diff of the subdataset from scratch assert_status('ok', res) ok_(len(res) > 3) # one specific test assert_result_count(res, 1, action='diff', state='added', path=opj(sub.path, '.datalad', 'config')) # now we add a file to just the parent create_tree(ds.path, { 'onefile': 'tobeadded', 'sub': { 'twofile': 'tobeadded' } }) res = ds.diff(recursive=True, report_untracked='all') assert_result_count(res, 3) assert_result_count(res, 1, action='diff', state='untracked', path=opj(ds.path, 'onefile'), type='file') assert_result_count(res, 1, action='diff', state='modified', path=sub.path, type='dataset') assert_result_count(res, 1, action='diff', state='untracked', path=opj(sub.path, 'twofile'), type='file') # save sub sub.add('.') # save sub in parent ds.save() # save addition in parent ds.add('.') ok_clean_git(ds.path) # look at the last change, only one file was added res = ds.diff(revision='HEAD~1..HEAD') assert_result_count(res, 1) assert_result_count(res, 1, action='diff', state='added', path=opj(ds.path, 'onefile'), type='file') # now the exact same thing with recursion, must not be different from the call # above res = ds.diff(recursive=True, revision='HEAD~1..HEAD') assert_result_count(res, 1) # last change in parent assert_result_count(res, 1, action='diff', state='added', path=opj(ds.path, 'onefile'), type='file') # one further back brings in the modified subdataset, and the added file within it res = ds.diff(recursive=True, revision='HEAD~2..HEAD') assert_result_count(res, 3) assert_result_count(res, 1, action='diff', state='added', path=opj(ds.path, 'onefile'), type='file') assert_result_count(res, 1, action='diff', state='added', path=opj(sub.path, 'twofile'), type='file') assert_result_count(res, 1, action='diff', state='modified', path=sub.path, type='dataset')
def test_rerun_onto(path): ds = Dataset(path).create() # Make sure we have more than one commit. The one commit case is checked # elsewhere. ds.repo.commit(msg="noop commit", options=["--allow-empty"]) grow_file = op.join(path, "grows") # Make sure we can handle range-specifications that yield no results. for since in ["", "HEAD"]: assert_result_count(ds.rerun("HEAD", onto="", since=since, on_failure="ignore"), 1, status="impossible", action="run") ds.run('echo static-content > static') ds.repo.tag("static") with swallow_outputs(): ds.run('echo x$(cat grows) > grows') ds.rerun() eq_('xx\n', open(grow_file).read()) # If we run the "static" change on top of itself, we end up in the # same (but detached) place. ds.rerun(revision="static", onto="static") ok_(ds.repo.get_active_branch() is None) eq_(ds.repo.get_hexsha(), ds.repo.get_hexsha("static")) # If we run the "static" change from the same "base", we end up # with a new commit. ds.repo.checkout("master") with swallow_outputs(): ds.rerun(revision="static", onto="static^") ok_(ds.repo.get_active_branch() is None) neq_(ds.repo.get_hexsha(), ds.repo.get_hexsha("static")) ok_(all(r["state"] == "clean" for r in ds.diff(fr="HEAD", to="static"))) for revrange in ["..static", "static.."]: eq_(len(ds.repo.get_revisions(revrange)), 1) # Unlike the static change, if we run the ever-growing change on # top of itself, we end up with a new commit. ds.repo.checkout("master") ds.rerun(onto="HEAD") ok_(ds.repo.get_active_branch() is None) neq_(ds.repo.get_hexsha(), ds.repo.get_hexsha("master")) # An empty `onto` means use the parent of the first revision. ds.repo.checkout("master") with swallow_outputs(): ds.rerun(since="static^", onto="") ok_(ds.repo.get_active_branch() is None) for revrange in ["..master", "master.."]: eq_(len(ds.repo.get_revisions(revrange)), 3) # An empty `onto` means use the parent of the first revision that # has a run command. ds.repo.checkout("master") with swallow_outputs(): ds.rerun(since="", onto="", branch="from-base") eq_(ds.repo.get_active_branch(), "from-base") ok_( all(r["state"] == "clean" for r in ds.diff(fr="master", to="from-base"))) eq_(ds.repo.get_merge_base(["static", "from-base"]), ds.repo.get_hexsha("static^")) # We abort when an explicitly specified `onto` doesn't exist. ds.repo.checkout("master") assert_result_count(ds.rerun(since="", onto="doesnotexist", branch="from-base", on_failure="ignore"), 1, status="error", action="run")
def test_rerun_onto(path): ds = Dataset(path).create() grow_file = opj(path, "grows") # Make sure we can handle range-specifications that yield no results. for since in ["", "HEAD"]: assert_result_count( ds.rerun("HEAD", onto="", since=since, on_failure="ignore"), 1, status="impossible", action="run") ds.run('echo static-content > static') ds.repo.tag("static") ds.run('echo x$(cat grows) > grows') ds.rerun() eq_('xx\n', open(grow_file).read()) # If we run the "static" change on top of itself, we end up in the # same (but detached) place. ds.rerun(revision="static", onto="static") ok_(ds.repo.get_active_branch() is None) eq_(ds.repo.get_hexsha(), ds.repo.get_hexsha("static")) # If we run the "static" change from the same "base", we end up # with a new commit. ds.repo.checkout("master") ds.rerun(revision="static", onto="static^") ok_(ds.repo.get_active_branch() is None) neq_(ds.repo.get_hexsha(), ds.repo.get_hexsha("static")) assert_result_count(ds.diff(revision="HEAD..static"), 0) for revrange in ["..static", "static.."]: assert_result_count( ds.repo.repo.git.rev_list(revrange).split(), 1) # Unlike the static change, if we run the ever-growing change on # top of itself, we end up with a new commit. ds.repo.checkout("master") ds.rerun(onto="HEAD") ok_(ds.repo.get_active_branch() is None) neq_(ds.repo.get_hexsha(), ds.repo.get_hexsha("master")) # An empty `onto` means use the parent of the first revision. ds.repo.checkout("master") ds.rerun(since="static^", onto="") ok_(ds.repo.get_active_branch() is None) for revrange in ["..master", "master.."]: assert_result_count( ds.repo.repo.git.rev_list(revrange).split(), 3) # An empty `onto` means use the parent of the first revision that # has a run command. ds.repo.checkout("master") ds.rerun(since="", onto="", branch="from-base") eq_(ds.repo.get_active_branch(), "from-base") assert_result_count(ds.diff(revision="master..from-base"), 0) eq_(ds.repo.get_merge_base(["static", "from-base"]), ds.repo.get_hexsha("static^")) # We abort when an explicitly specified `onto` doesn't exist. ds.repo.checkout("master") assert_result_count( ds.rerun(since="", onto="doesnotexist", branch="from-base", on_failure="ignore"), 1, status="error", action="run")
def test_diff_recursive(path): ds = Dataset(path).create() sub = ds.create('sub') # look at the last change, and confirm a dataset was added res = ds.diff(fr='HEAD~1', to='HEAD') assert_result_count(res, 1, action='diff', state='added', path=sub.path, type='dataset') # now recursive res = ds.diff(recursive=True, fr='HEAD~1', to='HEAD') # we also get the entire diff of the subdataset from scratch assert_status('ok', res) ok_(len(res) > 3) # one specific test assert_result_count(res, 1, action='diff', state='added', path=op.join(sub.path, '.datalad', 'config')) # now we add a file to just the parent create_tree(ds.path, { 'onefile': 'tobeadded', 'sub': { 'twofile': 'tobeadded' } }) res = ds.diff(recursive=True, untracked='all') assert_result_count(_dirty_results(res), 3) assert_result_count(res, 1, action='diff', state='untracked', path=op.join(ds.path, 'onefile'), type='file') assert_result_count(res, 1, action='diff', state='modified', path=sub.path, type='dataset') assert_result_count(res, 1, action='diff', state='untracked', path=op.join(sub.path, 'twofile'), type='file') # intentional save in two steps to make check below easier ds.save('sub', recursive=True) ds.save() assert_repo_status(ds.path) # look at the last change, only one file was added res = ds.diff(fr='HEAD~1', to='HEAD') assert_result_count(_dirty_results(res), 1) assert_result_count(res, 1, action='diff', state='added', path=op.join(ds.path, 'onefile'), type='file') # now the exact same thing with recursion, must not be different from the # call above res = ds.diff(recursive=True, fr='HEAD~1', to='HEAD') assert_result_count(_dirty_results(res), 1) # last change in parent assert_result_count(res, 1, action='diff', state='added', path=op.join(ds.path, 'onefile'), type='file') # one further back brings in the modified subdataset, and the added file # within it res = ds.diff(recursive=True, fr='HEAD~2', to='HEAD') assert_result_count(_dirty_results(res), 3) assert_result_count(res, 1, action='diff', state='added', path=op.join(ds.path, 'onefile'), type='file') assert_result_count(res, 1, action='diff', state='added', path=op.join(sub.path, 'twofile'), type='file') assert_result_count(res, 1, action='diff', state='modified', path=sub.path, type='dataset')
def test_diff_recursive(path): ds = Dataset(path).create() sub = ds.create('sub') # look at the last change, and confirm a dataset was added res = ds.diff(fr='HEAD~1', to='HEAD') assert_result_count( res, 1, action='diff', state='added', path=sub.path, type='dataset') # now recursive res = ds.diff(recursive=True, fr='HEAD~1', to='HEAD') # we also get the entire diff of the subdataset from scratch assert_status('ok', res) ok_(len(res) > 3) # one specific test assert_result_count( res, 1, action='diff', state='added', path=op.join(sub.path, '.datalad', 'config')) # now we add a file to just the parent create_tree( ds.path, {'onefile': 'tobeadded', 'sub': {'twofile': 'tobeadded'}}) res = ds.diff(recursive=True, untracked='all') assert_result_count(_dirty_results(res), 3) assert_result_count( res, 1, action='diff', state='untracked', path=op.join(ds.path, 'onefile'), type='file') assert_result_count( res, 1, action='diff', state='modified', path=sub.path, type='dataset') assert_result_count( res, 1, action='diff', state='untracked', path=op.join(sub.path, 'twofile'), type='file') # intentional save in two steps to make check below easier ds.save('sub', recursive=True) ds.save() assert_repo_status(ds.path) # look at the last change, only one file was added res = ds.diff(fr='HEAD~1', to='HEAD') assert_result_count(_dirty_results(res), 1) assert_result_count( res, 1, action='diff', state='added', path=op.join(ds.path, 'onefile'), type='file') # now the exact same thing with recursion, must not be different from the # call above res = ds.diff(recursive=True, fr='HEAD~1', to='HEAD') assert_result_count(_dirty_results(res), 1) # last change in parent assert_result_count( res, 1, action='diff', state='added', path=op.join(ds.path, 'onefile'), type='file') # one further back brings in the modified subdataset, and the added file # within it res = ds.diff(recursive=True, fr='HEAD~2', to='HEAD') assert_result_count(_dirty_results(res), 3) assert_result_count( res, 1, action='diff', state='added', path=op.join(ds.path, 'onefile'), type='file') assert_result_count( res, 1, action='diff', state='added', path=op.join(sub.path, 'twofile'), type='file') assert_result_count( res, 1, action='diff', state='modified', path=sub.path, type='dataset')
def test_diff(path, norepo): with chpwd(norepo): assert_status('impossible', diff(on_failure='ignore')) ds = Dataset(path).create() ok_clean_git(ds.path) # reports stupid revision input assert_result_count( ds.diff(revision='WTF', on_failure='ignore'), 1, status='impossible', message="fatal: bad revision 'WTF'") assert_result_count(ds.diff(), 0) # no diff assert_result_count(ds.diff(), 0) assert_result_count(ds.diff(revision='HEAD'), 0) # bogus path makes no difference assert_result_count(ds.diff(path='THIS', revision='HEAD'), 0) # comparing to a previous state we should get a diff in most cases # for this test, let's not care what exactly it is -- will do later assert len(ds.diff(revision='HEAD~1')) > 0 # let's introduce a known change create_tree(ds.path, {'new': 'empty'}) ds.add('.', to_git=True) ok_clean_git(ds.path) res = ds.diff(revision='HEAD~1') assert_result_count(res, 1) assert_result_count( res, 1, action='diff', path=opj(ds.path, 'new'), state='added') # we can also find the diff without going through the dataset explicitly with chpwd(ds.path): assert_result_count( diff(revision='HEAD~1'), 1, action='diff', path=opj(ds.path, 'new'), state='added') # no diff against HEAD assert_result_count(ds.diff(), 0) # modify known file create_tree(ds.path, {'new': 'notempty'}) for diffy in (None, 'HEAD'): res = ds.diff(revision=diffy) assert_result_count(res, 1) assert_result_count( res, 1, action='diff', path=opj(ds.path, 'new'), state='modified') # but if we give another path, it doesn't show up assert_result_count(ds.diff('otherpath'), 0) # giving the right path must work though assert_result_count( ds.diff('new'), 1, action='diff', path=opj(ds.path, 'new'), state='modified') # stage changes ds.add('.', to_git=True, save=False) # no diff, because we staged the modification assert_result_count(ds.diff(), 0) # but we can get at it assert_result_count( ds.diff(staged=True), 1, action='diff', path=opj(ds.path, 'new'), state='modified') # OR assert_result_count( ds.diff(revision='HEAD'), 1, action='diff', path=opj(ds.path, 'new'), state='modified') ds.save() ok_clean_git(ds.path) # untracked stuff create_tree(ds.path, {'deep': {'down': 'untracked', 'down2': 'tobeadded'}}) # a plain diff should report the untracked file # but not directly, because the parent dir is already unknown res = ds.diff() assert_result_count(res, 1) assert_result_count( res, 1, state='untracked', type='directory', path=opj(ds.path, 'deep')) # report of individual files is also possible assert_result_count( ds.diff(report_untracked='all'), 2, state='untracked', type='file') # an unmatching path will hide this result assert_result_count(ds.diff(path='somewhere'), 0) # perfect match and anything underneath will do assert_result_count( ds.diff(path='deep'), 1, state='untracked', path=opj(ds.path, 'deep'), type='directory') assert_result_count( ds.diff(path='deep'), 1, state='untracked', path=opj(ds.path, 'deep')) # now we stage on of the two files in deep ds.add(opj('deep', 'down2'), to_git=True, save=False) # without any reference it will ignore the staged stuff and report the remaining # untracked file assert_result_count( ds.diff(), 1, state='untracked', path=opj(ds.path, 'deep', 'down'), type='file') res = ds.diff(staged=True) assert_result_count( res, 1, state='untracked', path=opj(ds.path, 'deep', 'down'), type='file') assert_result_count( res, 1, state='added', path=opj(ds.path, 'deep', 'down2'), type='file')