def test_get_missing(path): repo = GitRepo(path, create=True) os.makedirs(op.join(path, 'deep')) with open(op.join(path, 'test1'), 'w') as f: f.write('some') with open(op.join(path, 'deep', 'test2'), 'w') as f: f.write('some more') # no files tracked yet, so nothing changed eq_(repo.get_changed_files(), []) repo.add('.') # still no differences between worktree and staged eq_(repo.get_changed_files(), []) eq_(set(repo.get_changed_files(staged=True)), {'test1', op.join('deep', 'test2')}) eq_(set(repo.get_changed_files(staged=True, diff_filter='AD')), {'test1', op.join('deep', 'test2')}) eq_(repo.get_changed_files(staged=True, diff_filter='D'), []) repo.commit() eq_(repo.get_changed_files(), []) eq_(repo.get_changed_files(staged=True), []) ok_clean_git(path, annex=False) unlink(op.join(path, 'test1')) eq_(repo.get_missing_files(), ['test1']) rmtree(op.join(path, 'deep')) eq_(sorted(repo.get_missing_files()), [op.join('deep', 'test2'), 'test1']) # nothing is actually known to be deleted eq_(repo.get_deleted_files(), []) # do proper removal repo.remove(op.join(path, 'test1')) # no longer missing eq_(repo.get_missing_files(), [op.join('deep', 'test2')]) # but deleted eq_(repo.get_deleted_files(), ['test1'])
def test_GitRepo_get_merge_base(src): repo = GitRepo(src, create=True) with open(op.join(src, 'file.txt'), 'w') as f: f.write('load') repo.add('*') repo.commit('committing') assert_raises(ValueError, repo.get_merge_base, []) branch1 = repo.get_active_branch() branch1_hexsha = repo.get_hexsha() eq_(len(branch1_hexsha), 40) eq_(repo.get_merge_base(branch1), branch1_hexsha) # Let's create a detached branch branch2 = "_detach_" repo.checkout(branch2, options=["--orphan"]) # it will have all the files # Must not do: https://github.com/gitpython-developers/GitPython/issues/375 # repo.git_add('.') repo.add('*') # NOTE: fun part is that we should have at least a different commit message # so it results in a different checksum ;) repo.commit("committing again") assert(repo.get_indexed_files()) # we did commit assert(repo.get_merge_base(branch1) is None) assert(repo.get_merge_base([branch2, branch1]) is None) # Let's merge them up -- then merge base should match the master repo.merge(branch1, allow_unrelated=True) eq_(repo.get_merge_base(branch1), branch1_hexsha) # if points to some empty/non-existing branch - should also be None assert(repo.get_merge_base(['nonexistent', branch2]) is None)
def test_GitRepo_dirty(path): repo = GitRepo(path, create=True) ok_(not repo.dirty) # untracked file with open(op.join(path, 'file1.txt'), 'w') as f: f.write('whatever') ok_(repo.dirty) # staged file repo.add('file1.txt') ok_(repo.dirty) # clean again repo.commit("file1.txt added") ok_(not repo.dirty) # modify to be the same with open(op.join(path, 'file1.txt'), 'w') as f: f.write('whatever') ok_(not repo.dirty) # modified file with open(op.join(path, 'file1.txt'), 'w') as f: f.write('something else') ok_(repo.dirty) # clean again repo.add('file1.txt') repo.commit("file1.txt modified") ok_(not repo.dirty)
def test_GitRepo_dirty(path): repo = GitRepo(path, create=True) ok_(not repo.dirty) # untracked file with open(op.join(path, 'file1.txt'), 'w') as f: f.write('whatever') ok_(repo.dirty) # staged file repo.add('file1.txt') ok_(repo.dirty) # clean again repo.commit("file1.txt added") ok_(not repo.dirty) # modify to be the same with open(op.join(path, 'file1.txt'), 'w') as f: f.write('whatever') ok_(not repo.dirty) # modified file with open(op.join(path, 'file1.txt'), 'w') as f: f.write('something else') ok_(repo.dirty) # clean again repo.add('file1.txt') repo.commit("file1.txt modified") ok_(not repo.dirty) # An empty directory doesn't count as dirty. os.mkdir(op.join(path, "empty")) ok_(not repo.dirty) # Neither does an empty directory with an otherwise empty directory. os.mkdir(op.join(path, "empty", "empty-again")) ok_(not repo.dirty)
def test_GitRepo_ssh_pull(remote_path, repo_path): from datalad import ssh_manager remote_repo = GitRepo(remote_path, create=True) url = "ssh://localhost" + op.abspath(remote_path) socket_path = op.join(ssh_manager.socket_dir, get_connection_hash('localhost')) repo = GitRepo(repo_path, create=True) repo.add_remote("ssh-remote", url) # modify remote: remote_repo.checkout("ssh-test", ['-b']) with open(op.join(remote_repo.path, "ssh_testfile.dat"), "w") as f: f.write("whatever") remote_repo.add("ssh_testfile.dat") remote_repo.commit("ssh_testfile.dat added.") # file is not locally known yet: assert_not_in("ssh_testfile.dat", repo.get_indexed_files()) # pull changes: repo.pull(remote="ssh-remote", refspec=remote_repo.get_active_branch()) ok_clean_git(repo.path, annex=False) # the connection is known to the SSH manager, since fetch() requested it: assert_in(socket_path, ssh_manager._connections) # and socket was created: ok_(op.exists(socket_path)) # we actually pulled the changes assert_in("ssh_testfile.dat", repo.get_indexed_files())
def test_GitRepo_get_merge_base(src): repo = GitRepo(src, create=True) with open(op.join(src, 'file.txt'), 'w') as f: f.write('load') repo.add('*') repo.commit('committing') assert_raises(ValueError, repo.get_merge_base, []) branch1 = repo.get_active_branch() branch1_hexsha = repo.get_hexsha() eq_(len(branch1_hexsha), 40) eq_(repo.get_merge_base(branch1), branch1_hexsha) # Let's create a detached branch branch2 = "_detach_" repo.checkout(branch2, options=["--orphan"]) # it will have all the files # Must not do: https://github.com/gitpython-developers/GitPython/issues/375 # repo.git_add('.') repo.add('*') # NOTE: fun part is that we should have at least a different commit message # so it results in a different checksum ;) repo.commit("committing again") assert (repo.get_indexed_files()) # we did commit assert (repo.get_merge_base(branch1) is None) assert (repo.get_merge_base([branch2, branch1]) is None) # Let's merge them up -- then merge base should match the master repo.merge(branch1, allow_unrelated=True) eq_(repo.get_merge_base(branch1), branch1_hexsha) # if points to some empty/non-existing branch - should also be None assert (repo.get_merge_base(['nonexistent', branch2]) is None)
def test_get_missing(path): repo = GitRepo(path, create=True) os.makedirs(op.join(path, 'deep')) with open(op.join(path, 'test1'), 'w') as f: f.write('some') with open(op.join(path, 'deep', 'test2'), 'w') as f: f.write('some more') # no files tracked yet, so nothing changed eq_(repo.get_changed_files(), []) repo.add('.') # still no differences between worktree and staged eq_(repo.get_changed_files(), []) eq_(set(repo.get_changed_files(staged=True)), {'test1', op.join('deep', 'test2')}) eq_(set(repo.get_changed_files(staged=True, diff_filter='AD')), {'test1', op.join('deep', 'test2')}) eq_(repo.get_changed_files(staged=True, diff_filter='D'), []) repo.commit() eq_(repo.get_changed_files(), []) eq_(repo.get_changed_files(staged=True), []) ok_clean_git(path, annex=False) unlink(op.join(path, 'test1')) eq_(repo.get_missing_files(), ['test1']) rmtree(op.join(path, 'deep')) eq_(sorted(repo.get_missing_files()), [op.join('deep', 'test2'), 'test1']) # nothing is actually known to be deleted eq_(repo.get_deleted_files(), []) # do proper removal repo.remove(op.join(path, 'test1')) # no longer missing eq_(repo.get_missing_files(), [op.join('deep', 'test2')]) # but deleted eq_(repo.get_deleted_files(), ['test1'])
def test_GitRepo_gitignore(path): gr = GitRepo(path, create=True) sub = GitRepo(op.join(path, 'ignore-sub.me')) # we need to commit something, otherwise add_submodule # will already refuse the submodule for having no commit sub.add('a_file.txt') sub.commit() from ..exceptions import GitIgnoreError with open(op.join(path, '.gitignore'), "w") as f: f.write("*.me") with assert_raises(GitIgnoreError) as cme: gr.add('ignore.me') eq_(cme.exception.paths, ['ignore.me']) with assert_raises(GitIgnoreError) as cme: gr.add_submodule(path='ignore-sub.me') eq_(cme.exception.paths, ['ignore-sub.me']) with assert_raises(GitIgnoreError) as cme: gr.add([ 'ignore.me', 'dontigno.re', op.join('ignore-sub.me', 'a_file.txt') ]) eq_(set(cme.exception.paths), {'ignore.me', 'ignore-sub.me'}) eq_(gr.get_gitattributes('.')['.'], {}) # nothing is recorded within .gitattributes
def test_optimized_cloning(path): # make test repo with one file and one commit originpath = op.join(path, 'origin') repo = GitRepo(originpath, create=True) with open(op.join(originpath, 'test'), 'w') as f: f.write('some') repo.add('test') repo.commit('init') ok_clean_git(originpath, annex=False) from glob import glob def _get_inodes(repo): return dict( [(os.path.join(*o.split(os.sep)[-2:]), os.stat(o).st_ino) for o in glob(os.path.join(repo.path, repo.get_git_dir(repo), 'objects', '*', '*'))]) origin_inodes = _get_inodes(repo) # now clone it in different ways and see what happens to the object storage from datalad.support.network import get_local_file_url clonepath = op.join(path, 'clone') for src in (originpath, get_local_file_url(originpath)): # deprecated assert_raises(DeprecatedError, GitRepo, url=src, path=clonepath) clone = GitRepo.clone(url=src, path=clonepath, create=True) clone_inodes = _get_inodes(clone) eq_(origin_inodes, clone_inodes, msg='with src={}'.format(src)) rmtree(clonepath)
def test_GitRepo_dirty(path): repo = GitRepo(path, create=True) ok_(not repo.dirty) # untracked file with open(op.join(path, 'file1.txt'), 'w') as f: f.write('whatever') ok_(repo.dirty) # staged file repo.add('file1.txt') ok_(repo.dirty) # clean again repo.commit("file1.txt added") ok_(not repo.dirty) # modify to be the same with open(op.join(path, 'file1.txt'), 'w') as f: f.write('whatever') ok_(not repo.dirty) # modified file with open(op.join(path, 'file1.txt'), 'w') as f: f.write('something else') ok_(repo.dirty) # clean again repo.add('file1.txt') repo.commit("file1.txt modified") ok_(not repo.dirty)
def test_GitRepo__get_files_history(path): gr = GitRepo(path, create=True) gr.add('d1') gr.commit("commit d1") #import pdb; pdb.set_trace() gr.add(['d2', 'file']) gr.commit("commit d2") # commit containing files of d1 d1_commit = next(gr._get_files_history([op.join(path, 'd1', 'f1'), op.join(path, 'd1', 'f1')])) eq_(str(d1_commit.message), 'commit d1\n') # commit containing files of d2 d2_commit_gen = gr._get_files_history([op.join(path, 'd2', 'f1'), op.join(path, 'd2', 'f1')]) eq_(str(next(d2_commit_gen).message), 'commit d2\n') assert_raises(StopIteration, next, d2_commit_gen) # no more commits with files of d2 # union of commits containing passed objects commits_union = gr._get_files_history([op.join(path, 'd1', 'f1'), op.join(path, 'd2', 'f1'), op.join(path, 'file')]) eq_(str(next(commits_union).message), 'commit d2\n') eq_(str(next(commits_union).message), 'commit d1\n') assert_raises(StopIteration, next, commits_union) # file2 not commited, so shouldn't exist in commit history no_such_commits = gr._get_files_history([op.join(path, 'file2')]) assert_raises(StopIteration, next, no_such_commits)
def test_GitRepo_gitignore(path): gr = GitRepo(path, create=True) sub = GitRepo(op.join(path, 'ignore-sub.me')) # we need to commit something, otherwise add_submodule # will already refuse the submodule for having no commit sub.add('a_file.txt') sub.commit() from ..exceptions import GitIgnoreError with open(op.join(path, '.gitignore'), "w") as f: f.write("*.me") with assert_raises(GitIgnoreError) as cme: gr.add('ignore.me') eq_(cme.exception.paths, ['ignore.me']) with assert_raises(GitIgnoreError) as cme: gr.add_submodule(path='ignore-sub.me') eq_(cme.exception.paths, ['ignore-sub.me']) with assert_raises(GitIgnoreError) as cme: gr.add(['ignore.me', 'dontigno.re', op.join('ignore-sub.me', 'a_file.txt')]) eq_(set(cme.exception.paths), {'ignore.me', 'ignore-sub.me'}) eq_(gr.get_gitattributes('.')['.'], {}) # nothing is recorded within .gitattributes
def test_GitRepo_ssh_pull(remote_path, repo_path): from datalad import ssh_manager remote_repo = GitRepo(remote_path, create=True) url = "ssh://localhost" + op.abspath(remote_path) socket_path = op.join(ssh_manager.socket_dir, get_connection_hash('localhost')) repo = GitRepo(repo_path, create=True) repo.add_remote("ssh-remote", url) # modify remote: remote_repo.checkout("ssh-test", ['-b']) with open(op.join(remote_repo.path, "ssh_testfile.dat"), "w") as f: f.write("whatever") remote_repo.add("ssh_testfile.dat") remote_repo.commit("ssh_testfile.dat added.") # file is not locally known yet: assert_not_in("ssh_testfile.dat", repo.get_indexed_files()) # pull changes: repo.pull(remote="ssh-remote", refspec=remote_repo.get_active_branch()) ok_clean_git(repo.path, annex=False) # the connection is known to the SSH manager, since fetch() requested it: assert_in(socket_path, ssh_manager._connections) # and socket was created: ok_(op.exists(socket_path)) # we actually pulled the changes assert_in("ssh_testfile.dat", repo.get_indexed_files())
def test_optimized_cloning(path): # make test repo with one file and one commit originpath = op.join(path, 'origin') repo = GitRepo(originpath, create=True) with open(op.join(originpath, 'test'), 'w') as f: f.write('some') repo.add('test') repo.commit('init') ok_clean_git(originpath, annex=False) from glob import glob def _get_inodes(repo): return dict([ (os.path.join(*o.split(os.sep)[-2:]), os.stat(o).st_ino) for o in glob(os.path.join(repo.repo.git_dir, 'objects', '*', '*')) ]) origin_inodes = _get_inodes(repo) # now clone it in different ways and see what happens to the object storage from datalad.support.network import get_local_file_url clonepath = op.join(path, 'clone') for src in (originpath, get_local_file_url(originpath)): # deprecated assert_raises(DeprecatedError, GitRepo, url=src, path=clonepath) clone = GitRepo.clone(url=src, path=clonepath, create=True) clone_inodes = _get_inodes(clone) eq_(origin_inodes, clone_inodes, msg='with src={}'.format(src)) rmtree(clonepath)
def test_GitRepo_add_submodule(source, path): top_repo = GitRepo(path, create=True) top_repo.add_submodule('sub', name='sub', url=source) top_repo.commit('submodule added') eq_([s.name for s in top_repo.get_submodules()], ['sub']) ok_clean_git(path) ok_clean_git(op.join(path, 'sub'))
def test_GitRepo_add_submodule(source, path): top_repo = GitRepo(path, create=True) top_repo.add_submodule('sub', name='sub', url=source) top_repo.commit('submodule added') eq_([s.name for s in top_repo.get_submodules()], ['sub']) ok_clean_git(path) ok_clean_git(op.join(path, 'sub'))
def test_parent_on_unborn_branch(path): from datalad.support.gitrepo import GitRepo ds = Dataset(GitRepo(path, create=True).path) assert_false(ds.repo.get_hexsha()) subrepo = GitRepo(opj(path, "sub"), create=True) subrepo.commit(msg="c", options=["--allow-empty"]) ds.repo.add_submodule(path="sub") eq_(ds.subdatasets(result_xfm='relpaths'), ["sub"])
def test_GitRepo_remove(path): gr = GitRepo(path, create=True) gr.add('*') gr.commit("committing all the files") eq_(gr.remove('file'), ['file']) eq_(set(gr.remove('d', r=True, f=True)), {'d/f1', 'd/f2'}) eq_(set(gr.remove('*', r=True, f=True)), {'file2', 'd2/f1', 'd2/f2'})
def test_GitRepo_remove(path): gr = GitRepo(path, create=True) gr.add('*') gr.commit("committing all the files") eq_(gr.remove('file'), ['file']) eq_(set(gr.remove('d', r=True, f=True)), {'d/f1', 'd/f2'}) eq_(set(gr.remove('*', r=True, f=True)), {'file2', 'd2/f1', 'd2/f2'})
def test_clone_dataladri(src, topurl, path): # make plain git repo ds_path = opj(src, 'ds') gr = GitRepo(ds_path, create=True) gr.add('test.txt') gr.commit('demo') Runner(cwd=gr.path)(['git', 'update-server-info']) # now install it somewhere else with patch('datalad.consts.DATASETS_TOPURL', topurl): ds = clone('///ds', path, result_xfm='datasets', return_type='item-or-list') eq_(ds.path, path) ok_clean_git(path, annex=False) ok_file_has_content(opj(path, 'test.txt'), 'some')
def test_clone_dataladri(src, topurl, path): # make plain git repo ds_path = Path(src) / 'ds' gr = GitRepo(ds_path, create=True) gr.add('test.txt') gr.commit('demo') Runner(cwd=gr.path).run(['git', 'update-server-info']) # now install it somewhere else with patch('datalad.consts.DATASETS_TOPURL', topurl): ds = clone('///ds', path, result_xfm='datasets', return_type='item-or-list') eq_(ds.path, path) assert_repo_status(path, annex=False) ok_file_has_content(ds.pathobj / 'test.txt', 'some')
def test_install_dataladri(src, topurl, path): # make plain git repo ds_path = opj(src, 'ds') gr = GitRepo(ds_path, create=True) gr.add('test.txt') gr.commit('demo') Runner(cwd=gr.path)(['git', 'update-server-info']) # now install it somewhere else with patch('datalad.consts.DATASETS_TOPURL', topurl), \ swallow_logs(): ds = install(path, source='///ds') eq_(ds.path, path) ok_clean_git(path, annex=False) ok_file_has_content(opj(path, 'test.txt'), 'some')
def test_install_dataladri(src, topurl, path): # make plain git repo ds_path = opj(src, 'ds') gr = GitRepo(ds_path, create=True) gr.add('test.txt') gr.commit('demo') Runner(cwd=gr.path)(['git', 'update-server-info']) # now install it somewhere else with patch('datalad.consts.DATASETS_TOPURL', topurl), \ swallow_logs(): ds = install(path, source='///ds') eq_(ds.path, path) ok_clean_git(path, annex=False) ok_file_has_content(opj(path, 'test.txt'), 'some')
def test_init_fail_under_known_subdir(path): repo = GitRepo(path, create=True) repo.add(op.join('subds', 'file_name')) # Should fail even if we do not commit but only add to index: with assert_raises(PathKnownToRepositoryError) as cme: GitRepo(op.join(path, 'subds'), create=True) assert_in("file_name", str(cme.exception)) # we provide a list of offenders # and after we commit - the same story repo.commit("added file") with assert_raises(PathKnownToRepositoryError) as cme: GitRepo(op.join(path, 'subds'), create=True) # But it would succeed if we disable the checks GitRepo(op.join(path, 'subds'), create=True, create_sanity_checks=False)
def test_clone_dataladri(src, topurl, path): # make plain git repo ds_path = opj(src, 'ds') gr = GitRepo(ds_path, create=True) gr.add('test.txt') gr.commit('demo') Runner(cwd=gr.path)(['git', 'update-server-info']) # now install it somewhere else with patch('datalad.support.network.DATASETS_TOPURL', topurl): ds = clone('///ds', path, result_xfm='datasets', return_type='item-or-list') eq_(ds.path, path) ok_clean_git(path, annex=False) ok_file_has_content(opj(path, 'test.txt'), 'some')
def test_ok_file_under_git_symlinks(path): # Test that works correctly under symlinked path orepo = GitRepo(path) orepo.add('ingit') orepo.commit('msg') orepo.add('staged') lpath = path + "-symlink" # will also be removed AFAIK by our tempfile handling os.symlink(path, lpath) ok_symlink(lpath) ok_file_under_git(op.join(path, 'ingit')) ok_file_under_git(op.join(lpath, 'ingit')) ok_file_under_git(op.join(lpath, 'staged')) with assert_raises(AssertionError): ok_file_under_git(op.join(lpath, 'notingit')) with assert_raises(AssertionError): ok_file_under_git(op.join(lpath, 'nonexisting'))
def test_GitRepo_ssh_push(repo_path, remote_path): from datalad import ssh_manager remote_repo = GitRepo(remote_path, create=True) url = "ssh://localhost" + op.abspath(remote_path) socket_path = op.join(ssh_manager.socket_dir, get_connection_hash('localhost')) repo = GitRepo(repo_path, create=True) repo.add_remote("ssh-remote", url) # modify local repo: repo.checkout("ssh-test", ['-b']) with open(op.join(repo.path, "ssh_testfile.dat"), "w") as f: f.write("whatever") repo.add("ssh_testfile.dat") repo.commit("ssh_testfile.dat added.") # file is not known to the remote yet: assert_not_in("ssh_testfile.dat", remote_repo.get_indexed_files()) # push changes: pushed = repo.push(remote="ssh-remote", refspec="ssh-test") # test PushInfo object for assert_in("ssh-remote/ssh-test", [commit.remote_ref.name for commit in pushed]) # the connection is known to the SSH manager, since fetch() requested it: assert_in(socket_path, ssh_manager._connections) # and socket was created: ok_(op.exists(socket_path)) # remote now knows the changes: assert_in("ssh-test", remote_repo.get_branches()) assert_in("ssh_testfile.dat", remote_repo.get_files("ssh-test")) # amend to make it require "--force": repo.commit("amended", options=['--amend']) # push without --force should yield an error: pushed = repo.push(remote="ssh-remote", refspec="ssh-test") assert_in("[rejected] (non-fast-forward)", pushed[0].summary) # now push using force: repo.push(remote="ssh-remote", refspec="ssh-test", force=True) # correct commit message in remote: assert_in("amended", list(remote_repo.get_branch_commits('ssh-test'))[-1].summary)
def test_GitRepo_ssh_push(repo_path, remote_path): from datalad import ssh_manager remote_repo = GitRepo(remote_path, create=True) url = "ssh://localhost" + op.abspath(remote_path) socket_path = op.join(ssh_manager.socket_dir, get_connection_hash('localhost')) repo = GitRepo(repo_path, create=True) repo.add_remote("ssh-remote", url) # modify local repo: repo.checkout("ssh-test", ['-b']) with open(op.join(repo.path, "ssh_testfile.dat"), "w") as f: f.write("whatever") repo.add("ssh_testfile.dat") repo.commit("ssh_testfile.dat added.") # file is not known to the remote yet: assert_not_in("ssh_testfile.dat", remote_repo.get_indexed_files()) # push changes: pushed = repo.push(remote="ssh-remote", refspec="ssh-test") # test PushInfo object for assert_in("ssh-remote/ssh-test", [commit.remote_ref.name for commit in pushed]) # the connection is known to the SSH manager, since fetch() requested it: assert_in(socket_path, ssh_manager._connections) # and socket was created: ok_(op.exists(socket_path)) # remote now knows the changes: assert_in("ssh-test", remote_repo.get_branches()) assert_in("ssh_testfile.dat", remote_repo.get_files("ssh-test")) # amend to make it require "--force": repo.commit("amended", options=['--amend']) # push without --force should yield an error: pushed = repo.push(remote="ssh-remote", refspec="ssh-test") assert_in("[rejected] (non-fast-forward)", pushed[0].summary) # now push using force: repo.push(remote="ssh-remote", refspec="ssh-test", force=True) # correct commit message in remote: assert_in("amended", list(remote_repo.get_branch_commits('ssh-test'))[-1].summary)
def test_get_commit_date(path): gr = GitRepo(path, create=True) eq_(gr.get_commit_date(), None) # Let's make a commit with a custom date DATE = "Wed Mar 14 03:47:30 2018 -0000" DATE_EPOCH = 1520999250 gr.add('1') gr.commit("committed", date=DATE) gr = GitRepo(path, create=True) date = gr.get_commit_date() neq_(date, None) eq_(date, DATE_EPOCH) eq_(date, gr.get_commit_date('master')) # and even if we get into a detached head gr.checkout(gr.get_hexsha()) eq_(gr.get_active_branch(), None) eq_(date, gr.get_commit_date('master'))
def initiate_pipeline_config(template, template_func=None, template_kwargs=None, path=curdir, commit=False): """ TODO Gergana ;) """ lgr.debug("Creating crawler configuration for template %s under %s", template, path) crawl_config_dir = opj(path, CRAWLER_META_DIR) if not exists(crawl_config_dir): lgr.log(2, "Creating %s", crawl_config_dir) makedirs(crawl_config_dir) crawl_config_repo_path = opj(CRAWLER_META_DIR, CRAWLER_META_CONFIG_FILENAME) crawl_config = opj(crawl_config_dir, CRAWLER_META_CONFIG_FILENAME) cfg_ = SafeConfigParserWithIncludes() cfg_.add_section(CRAWLER_PIPELINE_SECTION) cfg_.set(CRAWLER_PIPELINE_SECTION, 'template', template) if template_func: cfg_.set(CRAWLER_PIPELINE_SECTION, 'func', template_func) for k, v in (template_kwargs or {}).items(): cfg_.set(CRAWLER_PIPELINE_SECTION, "_" + k, str(v)) with open(crawl_config, 'w') as f: cfg_.write(f) if commit: repo = GitRepo(path) repo.add(crawl_config_repo_path) if repo.dirty: repo.commit( "Initialized crawling configuration to use template %s" % template, _datalad_msg=True) else: lgr.debug("Repository is not dirty -- not committing") return crawl_config
def test_get_commit_date(path): gr = GitRepo(path, create=True) eq_(gr.get_commit_date(), None) # Let's make a commit with a custom date DATE = "Wed Mar 14 03:47:30 2018 -0000" DATE_EPOCH = 1520999250 gr.add('1') gr.commit("committed", date=DATE) gr = GitRepo(path, create=True) date = gr.get_commit_date() neq_(date, None) eq_(date, DATE_EPOCH) eq_(date, gr.get_commit_date('master')) # and even if we get into a detached head gr.checkout(gr.get_hexsha()) eq_(gr.get_active_branch(), None) eq_(date, gr.get_commit_date('master'))
def test_get_tags(path): from mock import patch gr = GitRepo(path, create=True) eq_(gr.get_tags(), []) eq_(gr.describe(), None) # Explicitly override the committer date because tests may set it to a # fixed value, but we want to check that the returned tags are sorted by # the committer date. with patch.dict("os.environ", {"GIT_COMMITTER_DATE": "Thu, 07 Apr 2005 22:13:13 +0200"}): create_tree(gr.path, {'file': ""}) gr.add('file') gr.commit(msg="msg") eq_(gr.get_tags(), []) eq_(gr.describe(), None) gr.tag("nonannotated") tags1 = [{'name': 'nonannotated', 'hexsha': gr.get_hexsha()}] eq_(gr.get_tags(), tags1) eq_(gr.describe(), None) eq_(gr.describe(tags=True), tags1[0]['name']) first_commit = gr.get_hexsha() with patch.dict("os.environ", {"GIT_COMMITTER_DATE": "Fri, 08 Apr 2005 22:13:13 +0200"}): create_tree(gr.path, {'file': "123"}) gr.add('file') gr.commit(msg="changed") gr.tag("annotated", message="annotation") tags2 = tags1 + [{'name': 'annotated', 'hexsha': gr.get_hexsha()}] eq_(gr.get_tags(), tags2) eq_(gr.describe(), tags2[1]['name']) # compare prev commit eq_(gr.describe(commitish=first_commit), None) eq_(gr.describe(commitish=first_commit, tags=True), tags1[0]['name'])
def test_get_tags(path): from mock import patch gr = GitRepo(path, create=True) eq_(gr.get_tags(), []) eq_(gr.describe(), None) # Explicitly override the committer date because tests may set it to a # fixed value, but we want to check that the returned tags are sorted by # the committer date. with patch.dict("os.environ", {"GIT_COMMITTER_DATE": "Thu, 07 Apr 2005 22:13:13 +0200"}): create_tree(gr.path, {'file': ""}) gr.add('file') gr.commit(msg="msg") eq_(gr.get_tags(), []) eq_(gr.describe(), None) gr.tag("nonannotated") tags1 = [{'name': 'nonannotated', 'hexsha': gr.get_hexsha()}] eq_(gr.get_tags(), tags1) eq_(gr.describe(), None) eq_(gr.describe(tags=True), tags1[0]['name']) first_commit = gr.get_hexsha() with patch.dict("os.environ", {"GIT_COMMITTER_DATE": "Fri, 08 Apr 2005 22:13:13 +0200"}): create_tree(gr.path, {'file': "123"}) gr.add('file') gr.commit(msg="changed") gr.tag("annotated", message="annotation") tags2 = tags1 + [{'name': 'annotated', 'hexsha': gr.get_hexsha()}] eq_(gr.get_tags(), tags2) eq_(gr.describe(), tags2[1]['name']) # compare prev commit eq_(gr.describe(commitish=first_commit), None) eq_(gr.describe(commitish=first_commit, tags=True), tags1[0]['name'])
def test_fake_dates(path): gr = GitRepo(path, create=True, fake_dates=True) gr.add("foo") gr.commit("commit foo") seconds_initial = gr.config.obtain("datalad.fake-dates-start") # First commit is incremented by 1 second. eq_(seconds_initial + 1, gr.get_commit_date()) # The second commit by 2. gr.add("bar") gr.commit("commit bar") eq_(seconds_initial + 2, gr.get_commit_date()) # If we checkout another branch, its time is still based on the latest # timestamp in any local branch. gr.checkout("other", options=["--orphan"]) with open(op.join(path, "baz"), "w") as ofh: ofh.write("baz content") gr.add("baz") gr.commit("commit baz") eq_(gr.get_active_branch(), "other") eq_(seconds_initial + 3, gr.get_commit_date())
def test_fake_dates(path): gr = GitRepo(path, create=True, fake_dates=True) gr.add("foo") gr.commit("commit foo") seconds_initial = gr.config.obtain("datalad.fake-dates-start") # First commit is incremented by 1 second. eq_(seconds_initial + 1, gr.get_commit_date()) # The second commit by 2. gr.add("bar") gr.commit("commit bar") eq_(seconds_initial + 2, gr.get_commit_date()) # If we checkout another branch, its time is still based on the latest # timestamp in any local branch. gr.checkout("other", options=["--orphan"]) with open(op.join(path, "baz"), "w") as ofh: ofh.write("baz content") gr.add("baz") gr.commit("commit baz") eq_(gr.get_active_branch(), "other") eq_(seconds_initial + 3, gr.get_commit_date())
def test_get_missing(path): repo = GitRepo(path, create=True) os.makedirs(op.join(path, 'deep')) with open(op.join(path, 'test1'), 'w') as f: f.write('some') with open(op.join(path, 'deep', 'test2'), 'w') as f: f.write('some more') repo.add('.') repo.commit() ok_clean_git(path, annex=False) os.unlink(op.join(path, 'test1')) eq_(repo.get_missing_files(), ['test1']) rmtree(op.join(path, 'deep')) eq_(sorted(repo.get_missing_files()), [op.join('deep', 'test2'), 'test1']) # nothing is actually known to be deleted eq_(repo.get_deleted_files(), []) # do proper removal repo.remove(op.join(path, 'test1')) # no longer missing eq_(repo.get_missing_files(), [op.join('deep', 'test2')]) # but deleted eq_(repo.get_deleted_files(), ['test1'])
def test_GitRepo_git_get_branch_commits(src): repo = GitRepo(src, create=True) with open(op.join(src, 'file.txt'), 'w') as f: f.write('load') repo.add('*') repo.commit('committing') commits_default = list(repo.get_branch_commits()) commits = list(repo.get_branch_commits('master')) eq_(commits, commits_default) eq_(len(commits), 1) commits_stop0 = list(repo.get_branch_commits(stop=commits[0].hexsha)) eq_(commits_stop0, []) commits_hexsha = list(repo.get_branch_commits(value='hexsha')) commits_hexsha_left = list(repo.get_branch_commits(value='hexsha', limit='left-only')) eq_([commits[0].hexsha], commits_hexsha) # our unittest is rudimentary ;-) eq_(commits_hexsha_left, commits_hexsha) repo.precommit() # to stop all the batched processes for swallow_outputs raise SkipTest("TODO: Was more of a smoke test -- improve testing")
def test_GitRepo__get_files_history(path): gr = GitRepo(path, create=True) gr.add('d1') gr.commit("commit d1") #import pdb; pdb.set_trace() gr.add(['d2', 'file']) gr.commit("commit d2") # commit containing files of d1 d1_commit = next( gr._get_files_history( [op.join(path, 'd1', 'f1'), op.join(path, 'd1', 'f1')])) eq_(str(d1_commit.message), 'commit d1\n') # commit containing files of d2 d2_commit_gen = gr._get_files_history( [op.join(path, 'd2', 'f1'), op.join(path, 'd2', 'f1')]) eq_(str(next(d2_commit_gen).message), 'commit d2\n') assert_raises(StopIteration, next, d2_commit_gen) # no more commits with files of d2 # union of commits containing passed objects commits_union = gr._get_files_history([ op.join(path, 'd1', 'f1'), op.join(path, 'd2', 'f1'), op.join(path, 'file') ]) eq_(str(next(commits_union).message), 'commit d2\n') eq_(str(next(commits_union).message), 'commit d1\n') assert_raises(StopIteration, next, commits_union) # file2 not commited, so shouldn't exist in commit history no_such_commits = gr._get_files_history([op.join(path, 'file2')]) assert_raises(StopIteration, next, no_such_commits)
def test_custom_runner_protocol(path): # Check that a runner with a non-default protocol gets wired up correctly. prot = ExecutionTimeProtocol() gr = GitRepo(path, runner=Runner(cwd=path, protocol=prot), create=True) ok_(len(prot) > 0) ok_(prot[0]['duration'] >= 0) def check(prev_len, prot, command): # Check that the list grew and has the expected command without # assuming that it gained _only_ a one command. ok_(len(prot) > prev_len) assert_in(command, sum([p["command"] for p in prot[prev_len:]], [])) prev_len = len(prot) gr.add("foo") check(prev_len, prot, "add") prev_len = len(prot) gr.commit("commit foo") check(prev_len, prot, "commit") ok_(all(p['duration'] >= 0 for p in prot))
def test_GitRepo_git_get_branch_commits(src): repo = GitRepo(src, create=True) with open(op.join(src, 'file.txt'), 'w') as f: f.write('load') repo.add('*') repo.commit('committing') commits_default = list(repo.get_branch_commits()) commits = list(repo.get_branch_commits('master')) eq_(commits, commits_default) eq_(len(commits), 1) commits_stop0 = list(repo.get_branch_commits(stop=commits[0].hexsha)) eq_(commits_stop0, []) commits_hexsha = list(repo.get_branch_commits(value='hexsha')) commits_hexsha_left = list( repo.get_branch_commits(value='hexsha', limit='left-only')) eq_([commits[0].hexsha], commits_hexsha) # our unittest is rudimentary ;-) eq_(commits_hexsha_left, commits_hexsha) raise SkipTest("TODO: Was more of a smoke test -- improve testing")
def test_GitRepo_remote_update(path1, path2, path3): git1 = GitRepo(path1) git2 = GitRepo(path2) git3 = GitRepo(path3) git1.add_remote('git2', path2) git1.add_remote('git3', path3) # Setting up remote 'git2' with open(op.join(path2, 'masterfile'), 'w') as f: f.write("git2 in master") git2.add('masterfile') git2.commit("Add something to master.") git2.checkout('branch2', ['-b']) with open(op.join(path2, 'branch2file'), 'w') as f: f.write("git2 in branch2") git2.add('branch2file') git2.commit("Add something to branch2.") # Setting up remote 'git3' with open(op.join(path3, 'masterfile'), 'w') as f: f.write("git3 in master") git3.add('masterfile') git3.commit("Add something to master.") git3.checkout('branch3', ['-b']) with open(op.join(path3, 'branch3file'), 'w') as f: f.write("git3 in branch3") git3.add('branch3file') git3.commit("Add something to branch3.") git1.update_remote() # checkouts are 'tests' themselves, since they'll raise CommandError # if something went wrong git1.checkout('branch2') git1.checkout('branch3') branches1 = git1.get_branches() eq_({'branch2', 'branch3'}, set(branches1))
def test_GitRepo_remote_update(path1, path2, path3): git1 = GitRepo(path1) git2 = GitRepo(path2) git3 = GitRepo(path3) git1.add_remote('git2', path2) git1.add_remote('git3', path3) # Setting up remote 'git2' with open(op.join(path2, 'masterfile'), 'w') as f: f.write("git2 in master") git2.add('masterfile') git2.commit("Add something to master.") git2.checkout('branch2', ['-b']) with open(op.join(path2, 'branch2file'), 'w') as f: f.write("git2 in branch2") git2.add('branch2file') git2.commit("Add something to branch2.") # Setting up remote 'git3' with open(op.join(path3, 'masterfile'), 'w') as f: f.write("git3 in master") git3.add('masterfile') git3.commit("Add something to master.") git3.checkout('branch3', ['-b']) with open(op.join(path3, 'branch3file'), 'w') as f: f.write("git3 in branch3") git3.add('branch3file') git3.commit("Add something to branch3.") git1.update_remote() # checkouts are 'tests' themselves, since they'll raise CommandError # if something went wrong git1.checkout('branch2') git1.checkout('branch3') branches1 = git1.get_branches() eq_({'branch2', 'branch3'}, set(branches1))
def test_GitRepo_commit(path): gr = GitRepo(path) filename = get_most_obscure_supported_name() with open(op.join(path, filename), 'w') as f: f.write("File to add to git") gr.add(filename) gr.commit("Testing GitRepo.commit().") ok_clean_git(gr) eq_("Testing GitRepo.commit().{}".format(linesep), gr.repo.head.commit.message) with open(op.join(path, filename), 'w') as f: f.write("changed content") gr.add(filename) gr.commit("commit with options", options=to_options(dry_run=True)) # wasn't actually committed: ok_(gr.dirty) # commit with empty message: gr.commit() ok_clean_git(gr) # nothing to commit doesn't raise by default: gr.commit() # but does with careless=False: assert_raises(CommandError, gr.commit, careless=False) # committing untracked file raises: with open(op.join(path, "untracked"), "w") as f: f.write("some") assert_raises(FileNotInRepositoryError, gr.commit, files="untracked") # not existing file as well: assert_raises(FileNotInRepositoryError, gr.commit, files="not-existing")
def test_GitRepo_commit(path): gr = GitRepo(path) filename = get_most_obscure_supported_name() with open(op.join(path, filename), 'w') as f: f.write("File to add to git") gr.add(filename) gr.commit("Testing GitRepo.commit().") ok_clean_git(gr) eq_("Testing GitRepo.commit().{}".format(linesep), gr.repo.head.commit.message) with open(op.join(path, filename), 'w') as f: f.write("changed content") gr.add(filename) gr.commit("commit with options", options=to_options(dry_run=True)) # wasn't actually committed: ok_(gr.dirty) # commit with empty message: gr.commit() ok_clean_git(gr) # nothing to commit doesn't raise by default: gr.commit() # but does with careless=False: assert_raises(CommandError, gr.commit, careless=False) # committing untracked file raises: with open(op.join(path, "untracked"), "w") as f: f.write("some") assert_raises(FileNotInRepositoryError, gr.commit, files="untracked") # not existing file as well: assert_raises(FileNotInRepositoryError, gr.commit, files="not-existing")
def test_publish_recursive(origin, src_path, dst_path, sub1_pub, sub2_pub): # prepare src source = install(src_path, source=origin, recursive=True)[0] # create plain git at target: target = GitRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) # subdatasets have no remote yet, so recursive publishing should fail: with assert_raises(ValueError) as cm: publish(dataset=source, to="target", recursive=True) assert_in("No sibling 'target' found.", str(cm.exception)) # now, set up targets for the submodules: sub1_target = GitRepo(sub1_pub, create=True) sub1_target.checkout("TMP", ["-b"]) sub2_target = AnnexRepo(sub2_pub, create=True) sub2_target.checkout("TMP", ["-b"]) sub1 = GitRepo(opj(src_path, 'subm 1'), create=False) sub2 = GitRepo(opj(src_path, 'subm 2'), create=False) sub1.add_remote("target", sub1_pub) sub2.add_remote("target", sub2_pub) # publish recursively with swallow_logs(new_level=logging.DEBUG) as cml: res = publish(dataset=source, to="target", recursive=True) assert_not_in('forced update', cml.out, "we probably haven't merged git-annex before pushing") # testing result list # (Note: Dataset lacks __eq__ for now. Should this be based on path only?) assert_is_instance(res, tuple) assert_is_instance(res[0], list) assert_is_instance(res[1], list) eq_(res[1], []) # nothing failed/was skipped for item in res[0]: assert_is_instance(item, Dataset) eq_({res[0][0].path, res[0][1].path, res[0][2].path}, {src_path, sub1.path, sub2.path}) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) eq_(list(target.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) eq_(list(sub1_target.get_branch_commits("master")), list(sub1.get_branch_commits("master"))) eq_(list(sub1_target.get_branch_commits("git-annex")), list(sub1.get_branch_commits("git-annex"))) eq_(list(sub2_target.get_branch_commits("master")), list(sub2.get_branch_commits("master"))) eq_(list(sub2_target.get_branch_commits("git-annex")), list(sub2.get_branch_commits("git-annex"))) # test for publishing with --since. By default since no changes, only current pushed res_ = publish(dataset=source, recursive=True) # only current one would get pushed eq_(set(r.path for r in res_[0]), {src_path}) # all get pushed res_ = publish(dataset=source, recursive=True, since='HEAD^') eq_(set(r.path for r in res_[0]), {src_path, sub1.path, sub2.path}) # Let's now update one subm with open(opj(sub2.path, "file.txt"), 'w') as f: f.write('') sub2.add('file.txt') sub2.commit("") # TODO: Doesn't work: https://github.com/datalad/datalad/issues/636 #source.save("changed sub2", auto_add_changes=True) source.repo.commit("", options=['-a']) res_ = publish(dataset=source, recursive=True) # only updated ones were published eq_(set(r.path for r in res_[0]), {src_path, sub2.path})
def test_ls_json(topdir, topurl): annex = AnnexRepo(topdir, create=True) ds = Dataset(topdir) # create some file and commit it with open(opj(ds.path, 'subdsfile.txt'), 'w') as f: f.write('123') ds.add(path='subdsfile.txt') ds.save("Hello!", version_tag=1) # add a subdataset ds.install('subds', source=topdir) subdirds = ds.create(_path_('dir/subds2'), force=True) subdirds.add('file') git = GitRepo(opj(topdir, 'dir', 'subgit'), create=True) # create git repo git.add(opj(topdir, 'dir', 'subgit', 'fgit.txt')) # commit to git to init git repo git.commit() annex.add(opj(topdir, 'dir', 'subgit')) # add the non-dataset git repo to annex annex.add(opj(topdir, 'dir')) # add to annex (links) annex.drop(opj(topdir, 'dir', 'subdir', 'file2.txt'), options=['--force']) # broken-link annex.commit() git.add('fgit.txt') # commit to git to init git repo git.commit() # annex.add doesn't add submodule, so using ds.add ds.add(opj('dir', 'subgit')) # add the non-dataset git repo to annex ds.add('dir') # add to annex (links) ds.drop(opj('dir', 'subdir', 'file2.txt'), check=False) # broken-link # register "external" submodule by installing and uninstalling it ext_url = topurl + '/dir/subgit/.git' # need to make it installable via http Runner()('git update-server-info', cwd=opj(topdir, 'dir', 'subgit')) ds.install(opj('dir', 'subgit_ext'), source=ext_url) ds.uninstall(opj('dir', 'subgit_ext')) meta_dir = opj('.git', 'datalad', 'metadata') def get_metahash(*path): if not path: path = ['/'] return hashlib.md5(opj(*path).encode('utf-8')).hexdigest() def get_metapath(dspath, *path): return _path_(dspath, meta_dir, get_metahash(*path)) def get_meta(dspath, *path): with open(get_metapath(dspath, *path)) as f: return js.load(f) # Let's see that there is no crash if one of the files is available only # in relaxed URL mode, so no size could be picked up ds.repo.add_url_to_file('fromweb', topurl + '/noteventhere', options=['--relaxed']) for all_ in [True, False]: # recurse directories for recursive in [True, False]: for state in ['file', 'delete']: # subdataset should have its json created and deleted when # all=True else not subds_metapath = get_metapath(opj(topdir, 'subds')) exists_prior = exists(subds_metapath) #with swallow_logs(), swallow_outputs(): dsj = _ls_json(topdir, json=state, all_=all_, recursive=recursive) ok_startswith(dsj['tags'], '1-') exists_post = exists(subds_metapath) # print("%s %s -> %s" % (state, exists_prior, exists_post)) assert_equal(exists_post, (state == 'file' and recursive)) # root should have its json file created and deleted in all cases ds_metapath = get_metapath(topdir) assert_equal(exists(ds_metapath), state == 'file') # children should have their metadata json's created and deleted only when recursive=True child_metapath = get_metapath(topdir, 'dir', 'subdir') assert_equal(exists(child_metapath), (state == 'file' and all_)) # ignored directories should not have json files created in any case for subdir in [('.hidden', ), ('dir', 'subgit')]: assert_false(exists(get_metapath(topdir, *subdir))) # check if its updated in its nodes sublist too. used by web-ui json. regression test assert_equal(dsj['nodes'][0]['size']['total'], dsj['size']['total']) # check size of subdataset subds = [ item for item in dsj['nodes'] if item['name'] == ('subdsfile.txt' or 'subds') ][0] assert_equal(subds['size']['total'], '3 Bytes') # dir/subds2 must not be listed among nodes of the top dataset: topds_nodes = {x['name']: x for x in dsj['nodes']} assert_in('subds', topds_nodes) # XXX # # condition here is a bit a guesswork by yoh later on # # TODO: here and below clear destiny/interaction of all_ and recursive # assert_equal(dsj['size']['total'], # '15 Bytes' if (recursive and all_) else # ('9 Bytes' if (recursive or all_) else '3 Bytes') # ) # https://github.com/datalad/datalad/issues/1674 if state == 'file' and all_: dirj = get_meta(topdir, 'dir') dir_nodes = {x['name']: x for x in dirj['nodes']} # it should be present in the subdir meta assert_in('subds2', dir_nodes) assert_not_in('url_external', dir_nodes['subds2']) assert_in('subgit_ext', dir_nodes) assert_equal(dir_nodes['subgit_ext']['url'], ext_url) # and not in topds assert_not_in('subds2', topds_nodes) # run non-recursive dataset traversal after subdataset metadata already created # to verify sub-dataset metadata being picked up from its metadata file in such cases if state == 'file' and recursive and not all_: dsj = _ls_json(topdir, json='file', all_=False) subds = [ item for item in dsj['nodes'] if item['name'] == ('subdsfile.txt' or 'subds') ][0] assert_equal(subds['size']['total'], '3 Bytes') assert_equal(topds_nodes['fromweb']['size']['total'], UNKNOWN_SIZE)
def test_get_hexsha_tag(path): gr = GitRepo(path, create=True) gr.commit(msg="msg", options=["--allow-empty"]) gr.tag("atag", message="atag msg") # get_hexsha() dereferences a tag to a commit. eq_(gr.get_hexsha("atag"), gr.get_hexsha())
def test_publish_recursive(origin, src_path, dst_path, sub1_pub, sub2_pub): # prepare src source = install(src_path, source=origin, recursive=True)[0] # create plain git at target: target = GitRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) # subdatasets have no remote yet, so recursive publishing should fail: with assert_raises(ValueError) as cm: publish(dataset=source, to="target", recursive=True) assert_in("No sibling 'target' found", exc_str(cm.exception)) # now, set up targets for the submodules: sub1_target = GitRepo(sub1_pub, create=True) sub1_target.checkout("TMP", ["-b"]) sub2_target = AnnexRepo(sub2_pub, create=True) sub2_target.checkout("TMP", ["-b"]) sub1 = GitRepo(opj(src_path, 'subm 1'), create=False) sub2 = GitRepo(opj(src_path, 'subm 2'), create=False) sub1.add_remote("target", sub1_pub) sub2.add_remote("target", sub2_pub) # publish recursively with swallow_logs(new_level=logging.DEBUG) as cml: res = publish(dataset=source, to="target", recursive=True) assert_not_in( 'forced update', cml.out, "we probably haven't merged git-annex before pushing" ) # testing result list # (Note: Dataset lacks __eq__ for now. Should this be based on path only?) assert_is_instance(res, tuple) assert_is_instance(res[0], list) assert_is_instance(res[1], list) eq_(res[1], []) # nothing failed/was skipped for item in res[0]: assert_is_instance(item, Dataset) eq_({res[0][0].path, res[0][1].path, res[0][2].path}, {src_path, sub1.path, sub2.path}) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) eq_(list(target.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) eq_(list(sub1_target.get_branch_commits("master")), list(sub1.get_branch_commits("master"))) eq_(list(sub1_target.get_branch_commits("git-annex")), list(sub1.get_branch_commits("git-annex"))) eq_(list(sub2_target.get_branch_commits("master")), list(sub2.get_branch_commits("master"))) eq_(list(sub2_target.get_branch_commits("git-annex")), list(sub2.get_branch_commits("git-annex"))) # test for publishing with --since. By default since no changes, only current pushed res_ = publish(dataset=source, recursive=True) # only current one would get pushed eq_(set(r.path for r in res_[0]), {src_path}) # all get pushed res_ = publish(dataset=source, recursive=True, since='HEAD^') eq_(set(r.path for r in res_[0]), {src_path, sub1.path, sub2.path}) # Let's now update one subm with open(opj(sub2.path, "file.txt"), 'w') as f: f.write('') sub2.add('file.txt') sub2.commit("") # TODO: Doesn't work: https://github.com/datalad/datalad/issues/636 #source.save("changed sub2", all_changes=True) source.repo.commit("", options=['-a']) res_ = publish(dataset=source, recursive=True) # only updated ones were published eq_(set(r.path for r in res_[0]), {src_path, sub2.path})
def test_ls_json(topdir, topurl): annex = AnnexRepo(topdir, create=True) ds = Dataset(topdir) # create some file and commit it with open(opj(ds.path, 'subdsfile.txt'), 'w') as f: f.write('123') ds.add(path='subdsfile.txt') ds.save("Hello!", version_tag=1) # add a subdataset ds.install('subds', source=topdir) subdirds = ds.create(_path_('dir/subds2'), force=True) subdirds.add('file') git = GitRepo(opj(topdir, 'dir', 'subgit'), create=True) # create git repo git.add(opj(topdir, 'dir', 'subgit', 'fgit.txt')) # commit to git to init git repo git.commit() annex.add(opj(topdir, 'dir', 'subgit')) # add the non-dataset git repo to annex annex.add(opj(topdir, 'dir')) # add to annex (links) annex.drop(opj(topdir, 'dir', 'subdir', 'file2.txt'), options=['--force']) # broken-link annex.commit() git.add('fgit.txt') # commit to git to init git repo git.commit() # annex.add doesn't add submodule, so using ds.add ds.add(opj('dir', 'subgit')) # add the non-dataset git repo to annex ds.add('dir') # add to annex (links) ds.drop(opj('dir', 'subdir', 'file2.txt'), check=False) # broken-link # register "external" submodule by installing and uninstalling it ext_url = topurl + '/dir/subgit/.git' # need to make it installable via http Runner()('git update-server-info', cwd=opj(topdir, 'dir', 'subgit')) ds.install(opj('dir', 'subgit_ext'), source=ext_url) ds.uninstall(opj('dir', 'subgit_ext')) meta_dir = opj('.git', 'datalad', 'metadata') def get_metahash(*path): if not path: path = ['/'] return hashlib.md5(opj(*path).encode('utf-8')).hexdigest() def get_metapath(dspath, *path): return _path_(dspath, meta_dir, get_metahash(*path)) def get_meta(dspath, *path): with open(get_metapath(dspath, *path)) as f: return js.load(f) # Let's see that there is no crash if one of the files is available only # in relaxed URL mode, so no size could be picked up ds.repo.add_url_to_file( 'fromweb', topurl + '/noteventhere', options=['--relaxed']) for all_ in [True, False]: # recurse directories for recursive in [True, False]: for state in ['file', 'delete']: # subdataset should have its json created and deleted when # all=True else not subds_metapath = get_metapath(opj(topdir, 'subds')) exists_prior = exists(subds_metapath) #with swallow_logs(), swallow_outputs(): dsj = _ls_json( topdir, json=state, all_=all_, recursive=recursive ) ok_startswith(dsj['tags'], '1-') exists_post = exists(subds_metapath) # print("%s %s -> %s" % (state, exists_prior, exists_post)) assert_equal(exists_post, (state == 'file' and recursive)) # root should have its json file created and deleted in all cases ds_metapath = get_metapath(topdir) assert_equal(exists(ds_metapath), state == 'file') # children should have their metadata json's created and deleted only when recursive=True child_metapath = get_metapath(topdir, 'dir', 'subdir') assert_equal(exists(child_metapath), (state == 'file' and all_)) # ignored directories should not have json files created in any case for subdir in [('.hidden',), ('dir', 'subgit')]: assert_false(exists(get_metapath(topdir, *subdir))) # check if its updated in its nodes sublist too. used by web-ui json. regression test assert_equal(dsj['nodes'][0]['size']['total'], dsj['size']['total']) # check size of subdataset subds = [item for item in dsj['nodes'] if item['name'] == ('subdsfile.txt' or 'subds')][0] assert_equal(subds['size']['total'], '3 Bytes') # dir/subds2 must not be listed among nodes of the top dataset: topds_nodes = {x['name']: x for x in dsj['nodes']} assert_in('subds', topds_nodes) # XXX # # condition here is a bit a guesswork by yoh later on # # TODO: here and below clear destiny/interaction of all_ and recursive # assert_equal(dsj['size']['total'], # '15 Bytes' if (recursive and all_) else # ('9 Bytes' if (recursive or all_) else '3 Bytes') # ) # https://github.com/datalad/datalad/issues/1674 if state == 'file' and all_: dirj = get_meta(topdir, 'dir') dir_nodes = {x['name']: x for x in dirj['nodes']} # it should be present in the subdir meta assert_in('subds2', dir_nodes) assert_not_in('url_external', dir_nodes['subds2']) assert_in('subgit_ext', dir_nodes) assert_equal(dir_nodes['subgit_ext']['url'], ext_url) # and not in topds assert_not_in('subds2', topds_nodes) # run non-recursive dataset traversal after subdataset metadata already created # to verify sub-dataset metadata being picked up from its metadata file in such cases if state == 'file' and recursive and not all_: dsj = _ls_json(topdir, json='file', all_=False) subds = [ item for item in dsj['nodes'] if item['name'] == ('subdsfile.txt' or 'subds') ][0] assert_equal(subds['size']['total'], '3 Bytes') assert_equal( topds_nodes['fromweb']['size']['total'], UNKNOWN_SIZE )
def test_fs_traverse(topdir): # setup temp directory tree for testing annex = AnnexRepo(topdir) AnnexRepo(opj(topdir, 'annexdir'), create=True) GitRepo(opj(topdir, 'gitdir'), create=True) subgit = GitRepo(opj(topdir, 'dir', 'subgit'), create=True) subgit.add(".") subgit.commit(msg="c1") annex.add(opj(topdir, 'dir')) annex.commit() annex.drop(opj(topdir, 'dir', 'subdir', 'file2.txt'), options=['--force']) # traverse file system in recursive and non-recursive modes for recursive in [True, False]: # test fs_traverse in display mode with swallow_logs( new_level=logging.INFO) as log, swallow_outputs() as cmo: repo = AnnexRepo(topdir) fs = fs_traverse(topdir, repo, recurse_directories=recursive, json='display') if recursive: # fs_traverse logs should contain all not ignored subdirectories for subdir in [ opj(topdir, 'dir'), opj(topdir, 'dir', 'subdir') ]: assert_in('Directory: ' + subdir, log.out) # fs_traverse stdout contains subdirectory assert_in(('file2.txt' and 'dir'), cmo.out) # extract info of the top-level child directory child = [item for item in fs['nodes'] if item['name'] == 'dir'][0] # size of dir type child in non-recursive modes should be 0 Bytes(default) as # dir type child's size currently has no metadata file for traverser to pick its size from # and would require a recursive traversal w/ write to child metadata file mode assert_equal(child['size']['total'], { True: '6 Bytes', False: '0 Bytes' }[recursive]) repo.precommit( ) # to possibly stop batch process occupying the stdout for recursive in [True, False]: # run fs_traverse in write to json 'file' mode repo = AnnexRepo(topdir) fs = fs_traverse(topdir, repo, recurse_directories=recursive, json='file') # fs_traverse should return a dictionary assert_equal(isinstance(fs, dict), True) # not including git and annex folders assert_equal([ item for item in fs['nodes'] if ('gitdir' or 'annexdir') == item['name'] ], []) # extract info of the top-level child directory child = [item for item in fs['nodes'] if item['name'] == 'dir'][0] # verify node type assert_equal(child['type'], 'dir') # same node size on running fs_traversal in recursive followed by non-recursive mode # verifies child's metadata file being used to find its size # running in reverse order (non-recursive followed by recursive mode) will give (0, actual size) assert_equal(child['size']['total'], '6 Bytes') # verify subdirectory traversal if run in recursive mode # In current RF 'nodes' are stripped away during recursive traversal # for now... later we might reincarnate them "differently" # TODO! if False: # recursive: # sub-dictionary should not include git and hidden directory info assert_equal([ item for item in child['nodes'] if ('subgit' or '.fgit') == item['name'] ], []) # extract subdirectory dictionary, else fail subchild = [ subitem for subitem in child["nodes"] if subitem['name'] == 'subdir' ][0] # extract info of file1.txts, else fail link = [ subnode for subnode in subchild["nodes"] if subnode['name'] == 'file1.txt' ][0] # verify node's sizes and type assert_equal(link['size']['total'], '3 Bytes') assert_equal(link['size']['ondisk'], link['size']['total']) assert_equal(link['type'], 'link') # extract info of file2.txt, else fail brokenlink = [ subnode for subnode in subchild["nodes"] if subnode['name'] == 'file2.txt' ][0] # verify node's sizes and type assert_equal(brokenlink['type'], 'link-broken') assert_equal(brokenlink['size']['ondisk'], '0 Bytes') assert_equal(brokenlink['size']['total'], '3 Bytes')
def test_get_hexsha_tag(path): gr = GitRepo(path, create=True) gr.commit(msg="msg", options=["--allow-empty"]) gr.tag("atag", message="atag msg") # get_hexsha() dereferences a tag to a commit. eq_(gr.get_hexsha("atag"), gr.get_hexsha())