def _test_save_all(path, repocls): ds = get_convoluted_situation(path, GitRepo) orig_status = ds.repo.status(untracked='all') # TODO test the results when the are crafted res = ds.repo.save() # make sure we get a 'delete' result for each deleted file eq_(set(r['path'] for r in res if r['action'] == 'delete'), {k for k, v in orig_status.items() if k.name == 'file_deleted'}) saved_status = ds.repo.status(untracked='all') # we still have an entry for everything that did not get deleted # intentionally eq_(len([f for f, p in orig_status.items() if not f.match('*_deleted')]), len(saved_status)) # everything but subdataset entries that contain untracked content, # or modified subsubdatasets is now clean, a repo simply doesn touch # other repos' private parts for f, p in saved_status.items(): if p.get('state', None) != 'clean': assert f.match('subds_modified'), f return ds
def _test_save_all(path, repocls): ds = get_convoluted_situation(path, GitRepo) orig_status = ds.repo.status(untracked='all') # TODO test the results when the are crafted res = ds.repo.save() # make sure we get a 'delete' result for each deleted file eq_( set(r['path'] for r in res if r['action'] == 'delete'), {k for k, v in iteritems(orig_status) if k.name == 'file_deleted'} ) saved_status = ds.repo.status(untracked='all') # we still have an entry for everything that did not get deleted # intentionally eq_( len([f for f, p in iteritems(orig_status) if not f.match('*_deleted')]), len(saved_status)) # everything but subdataset entries that contain untracked content, # or modified subsubdatasets is now clean, a repo simply doesn touch # other repos' private parts for f, p in iteritems(saved_status): if p.get('state', None) != 'clean': assert f.match('subds_modified'), f return ds
def test_get_content_info(path): repo = GitRepo(path) assert_equal(repo.get_content_info(), {}) # an invalid reference causes an exception assert_raises(ValueError, repo.get_content_info, ref='HEAD') ds = get_convoluted_situation(path) repopath = ds.repo.pathobj assert_equal(ds.repo.pathobj, repopath) assert_equal(ds.pathobj, ut.Path(path)) # verify general rules on fused info records that are incrementally # assembled: for git content info, amended with annex info on 'HEAD' # (to get the last committed stage and with it possibly vanished # content), and lastly annex info wrt to the present worktree, to # also get info on added/staged content # this fuses the info reported from # - git ls-files # - git annex findref HEAD # - git annex find --include '*' for f, r in ds.repo.annexstatus().items(): if f.match('*_untracked'): assert (r.get('gitshasum', None) is None) if f.match('*_deleted'): assert (not f.exists() and not f.is_symlink() is None) if f.match('subds_*'): assert (r['type'] == 'dataset' if r.get('gitshasum', None) else 'directory') if f.match('file_*'): # which one exactly depends on many things assert_in(r['type'], ('file', 'symlink')) if f.match('file_ingit*'): assert (r['type'] == 'file') elif '.datalad' not in f.parts and not f.match('.git*') and \ r.get('gitshasum', None) and not f.match('subds*'): # this should be known to annex, one way or another # regardless of whether things add deleted or staged # or anything in between assert_in('key', r, f) assert_in('keyname', r, f) assert_in('backend', r, f) assert_in('bytesize', r, f) # no duplication with path assert_not_in('file', r, f) # query full untracked report res = ds.repo.get_content_info() assert_in(repopath.joinpath('dir_untracked', 'file_untracked'), res) assert_not_in(repopath.joinpath('dir_untracked'), res) # query for compact untracked report res = ds.repo.get_content_info(untracked='normal') assert_not_in(repopath.joinpath('dir_untracked', 'file_untracked'), res) assert_in(repopath.joinpath('dir_untracked'), res) # query no untracked report res = ds.repo.get_content_info(untracked='no') assert_not_in(repopath.joinpath('dir_untracked', 'file_untracked'), res) assert_not_in(repopath.joinpath('dir_untracked'), res) # git status integrity status = ds.repo.status() for t in ('subds', 'file'): for s in ('untracked', 'added', 'deleted', 'clean', 'ingit_clean', 'dropped_clean', 'modified', 'ingit_modified'): for l in ('', ut.PurePosixPath('subdir', '')): if t == 'subds' and 'ingit' in s or 'dropped' in s: # invalid combination continue if t == 'subds' and s == 'deleted': # same as subds_unavailable -> clean continue p = repopath.joinpath(l, '{}_{}'.format(t, s)) assert p.match('*_{}'.format(status[p]['state'])), p if t == 'subds': assert_in(status[p]['type'], ('dataset', 'directory'), p) else: assert_in(status[p]['type'], ('file', 'symlink'), p) # git annex status integrity annexstatus = ds.repo.annexstatus() for t in ('file', ): for s in ('untracked', 'added', 'deleted', 'clean', 'ingit_clean', 'dropped_clean', 'modified', 'ingit_modified'): for l in ('', ut.PurePosixPath('subdir', '')): p = repopath.joinpath(l, '{}_{}'.format(t, s)) if s in ('untracked', 'ingit_clean', 'ingit_modified'): # annex knows nothing about these things assert_not_in('key', annexstatus[p]) continue assert_in('key', annexstatus[p]) # dear future, # if the next one fails, git-annex might have changed the # nature of the path that are being reported by # `annex find --json` # when this was written `hashir*` was a native path, but # `file` was a POSIX path assert_equal(annexstatus[p]['has_content'], 'dropped' not in s) # check the different subds evaluation modes someds = Dataset(ds.pathobj / 'subds_modified' / 'someds') dirtyds_path = someds.pathobj / 'dirtyds' assert_not_in('state', someds.repo.status(eval_submodule_state='no')[dirtyds_path]) assert_equal( 'clean', someds.repo.status( eval_submodule_state='commit')[dirtyds_path]['state']) assert_equal( 'modified', someds.repo.status(eval_submodule_state='full')[dirtyds_path]['state'])
def test_get_content_info(path): repo = GitRepo(path) assert_equal(repo.get_content_info(), {}) # an invalid reference causes an exception assert_raises(ValueError, repo.get_content_info, ref='HEAD') ds = get_convoluted_situation(path) repopath = ds.repo.pathobj assert_equal(ds.repo.pathobj, repopath) assert_equal(ds.pathobj, ut.Path(path)) # verify general rules on fused info records that are incrementally # assembled: for git content info, amended with annex info on 'HEAD' # (to get the last commited stage and with it possibly vanished # content), and lastly annex info wrt to the present worktree, to # also get info on added/staged content # this fuses the info reported from # - git ls-files # - git annex findref HEAD # - git annex find --include '*' for f, r in ds.repo.annexstatus().items(): if f.match('*_untracked'): assert(r.get('gitshasum', None) is None) if f.match('*_deleted'): assert(not f.exists() and not f.is_symlink() is None) if f.match('subds_*'): assert(r['type'] == 'dataset' if r.get('gitshasum', None) else 'directory') if f.match('file_*'): # which one exactly depends on many things assert_in(r['type'], ('file', 'symlink')) if f.match('file_ingit*'): assert(r['type'] == 'file') elif '.datalad' not in f.parts and not f.match('.git*') and \ r.get('gitshasum', None) and not f.match('subds*'): # this should be known to annex, one way or another # regardless of whether things add deleted or staged # or anything inbetween assert_in('key', r, f) assert_in('keyname', r, f) assert_in('backend', r, f) assert_in('bytesize', r, f) # no duplication with path assert_not_in('file', r, f) # query full untracked report res = ds.repo.get_content_info() assert_in(repopath.joinpath('dir_untracked', 'file_untracked'), res) assert_not_in(repopath.joinpath('dir_untracked'), res) # query for compact untracked report res = ds.repo.get_content_info(untracked='normal') assert_not_in(repopath.joinpath('dir_untracked', 'file_untracked'), res) assert_in(repopath.joinpath('dir_untracked'), res) # query no untracked report res = ds.repo.get_content_info(untracked='no') assert_not_in(repopath.joinpath('dir_untracked', 'file_untracked'), res) assert_not_in(repopath.joinpath('dir_untracked'), res) # git status integrity status = ds.repo.status() for t in ('subds', 'file'): for s in ('untracked', 'added', 'deleted', 'clean', 'ingit_clean', 'dropped_clean', 'modified', 'ingit_modified'): for l in ('', ut.PurePosixPath('subdir', '')): if t == 'subds' and 'ingit' in s or 'dropped' in s: # invalid combination continue if t == 'subds' and s == 'deleted': # same as subds_unavailable -> clean continue p = repopath.joinpath(l, '{}_{}'.format(t, s)) assert p.match('*_{}'.format(status[p]['state'])), p if t == 'subds': assert_in(status[p]['type'], ('dataset', 'directory'), p) else: assert_in(status[p]['type'], ('file', 'symlink'), p) # git annex status integrity annexstatus = ds.repo.annexstatus() for t in ('file',): for s in ('untracked', 'added', 'deleted', 'clean', 'ingit_clean', 'dropped_clean', 'modified', 'ingit_modified'): for l in ('', ut.PurePosixPath('subdir', '')): p = repopath.joinpath(l, '{}_{}'.format(t, s)) if s in ('untracked', 'ingit_clean', 'ingit_modified'): # annex knows nothing about these things assert_not_in('key', annexstatus[p]) continue assert_in('key', annexstatus[p]) # dear future, # if the next one fails, git-annex might have changed the # nature of the path that are being reported by # `annex find --json` # when this was written `hashir*` was a native path, but # `file` was a POSIX path assert_equal(annexstatus[p]['has_content'], 'dropped' not in s) # check the different subds evaluation modes someds = Dataset(ds.pathobj / 'subds_modified' / 'someds') dirtyds_path = someds.pathobj / 'dirtyds' assert_not_in( 'state', someds.repo.status(eval_submodule_state='no')[dirtyds_path] ) assert_equal( 'clean', someds.repo.status(eval_submodule_state='commit')[dirtyds_path]['state'] ) assert_equal( 'modified', someds.repo.status(eval_submodule_state='full')[dirtyds_path]['state'] )