def test_rotree(d): d2 = opj(d, 'd1', 'd2') # deep nested directory f = opj(d2, 'f1') os.makedirs(d2) with open(f, 'w') as f_: f_.write("LOAD") with swallow_logs(): ar = AnnexRepo(d2) rotree(d) # we shouldn't be able to delete anything UNLESS in "crippled" situation: # root, or filesystem is FAT etc # Theoretically annex should declare FS as crippled when ran as root, but # see http://git-annex.branchable.com/bugs/decides_that_FS_is_crippled_ # under_cowbuilder___40__symlinks_supported_etc__41__/#comment-60c3cbe2710d6865fb9b7d6e247cd7aa # so explicit 'or' if not (ar.is_crippled_fs() or (os.getuid() == 0)): assert_raises(OSError, os.unlink, f) # OK to use os.unlink assert_raises(OSError, unlink, f) # and even with waiting and trying! assert_raises(OSError, shutil.rmtree, d) # but file should still be accessible with open(f) as f_: eq_(f_.read(), "LOAD") # make it RW rotree(d, False) unlink(f) shutil.rmtree(d)
def repo(self): """Get an instance of the version control system/repo for this dataset, or None if there is none yet. If creating an instance of GitRepo is guaranteed to be really cheap this could also serve as a test whether a repo is present. Returns ------- GitRepo """ if self._repo is None: with swallow_logs(): try: self._repo = AnnexRepo(self._path, create=False, init=False) except (InvalidGitRepositoryError, NoSuchPathError, RuntimeError): try: self._repo = GitRepo(self._path, create=False) except (InvalidGitRepositoryError, NoSuchPathError): pass elif not isinstance(self._repo, AnnexRepo): # repo was initially set to be self._repo but might become AnnexRepo # at a later moment, so check if it didn't happen if 'git-annex' in self._repo.git_get_branches(): # we acquired git-annex branch self._repo = AnnexRepo(self._repo.path, create=False) return self._repo
def test_publish_file_handle(origin, src_path, dst_path): # prepare src source = install(path=src_path, source=origin, recursive=True) # TODO: For now, circumnavigate the detached head issue. # Figure out, what to do. for subds in source.get_dataset_handles(recursive=True): AnnexRepo(opj(src_path, subds), init=True, create=True).git_checkout("master") source.repo.get('test-annex.dat') # create plain git at target: target = AnnexRepo(dst_path, create=True) # actually not needed for this test, but provide same setup as # everywhere else: target.git_checkout("TMP", "-b") source.repo.git_remote_add("target", dst_path) # directly publish a file handle, not the dataset itself: res = publish(dataset=source, dest="target", path="test-annex.dat") eq_(res, opj(source.path, 'test-annex.dat')) # only file was published, not the dataset itself: assert_not_in("master", target.git_get_branches()) eq_(Dataset(dst_path).get_dataset_handles(), []) assert_not_in("test.dat", target.git_get_files()) # content is now available from 'target': assert_in("target", source.repo.annex_whereis('test-annex.dat', output="descriptions")) source.repo.annex_drop('test-annex.dat') eq_(source.repo.file_has_content(['test-annex.dat']), [False]) source.repo._run_annex_command('get', annex_options=['test-annex.dat', '--from=target']) eq_(source.repo.file_has_content(['test-annex.dat']), [True])
def test_AnnexRepo_is_direct_mode(path): ar = AnnexRepo(path) dm = ar.is_direct_mode() if on_windows: assert_true(dm, "AnnexRepo.is_direct_mode() returned false on windows.") else: assert_false(dm, "AnnexRepo.is_direct_mode() returned true on non-windows")
def test_AnnexRepo_crippled_filesystem(src, dst): # TODO: This test is rudimentary, since platform not really determines filesystem. # For now this should work for the buildbots. Nevertheless: Find a better way to test it. ar = AnnexRepo(dst, src) if on_windows: assert_true(ar.is_crippled_fs(), "Detected non-crippled filesystem on windows.") else: assert_false(ar.is_crippled_fs(), "Detected crippled filesystem on non-windows.")
def test_AnnexRepo_file_has_content(src, annex_path): ar = AnnexRepo(annex_path, src) testfiles = ["test-annex.dat", "test.dat"] assert_equal(ar.file_has_content(testfiles), [("test-annex.dat", False), ("test.dat", False)]) ar.annex_get("test-annex.dat") assert_equal(ar.file_has_content(testfiles), [("test-annex.dat", True), ("test.dat", False)])
def ok_file_under_git(path, filename, annexed=False): repo = AnnexRepo(path) assert(filename in repo.get_indexed_files()) # file is known to Git try: repo.get_file_key(filename) in_annex = True except FileNotInAnnexError as e: in_annex = False assert(annexed == in_annex)
def test_update_fetch_all(src, remote_1, remote_2): rmt1 = AnnexRepo.clone(src, remote_1) rmt2 = AnnexRepo.clone(src, remote_2) ds = Dataset(src) ds.siblings('add', name="sibling_1", url=remote_1) ds.siblings('add', name="sibling_2", url=remote_2) # modify the remotes: with open(opj(remote_1, "first.txt"), "w") as f: f.write("some file load") rmt1.add("first.txt") rmt1.commit() # TODO: Modify an already present file! with open(opj(remote_2, "second.txt"), "w") as f: f.write("different file load") rmt2.add("second.txt", git=True) rmt2.commit(msg="Add file to git.") # Let's init some special remote which we couldn't really update/fetch if not os.environ.get('DATALAD_TESTS_DATALADREMOTE'): ds.repo.init_remote( 'datalad', ['encryption=none', 'type=external', 'externaltype=datalad']) # fetch all remotes assert_result_count( ds.update(), 1, status='ok', type='dataset') # no merge, so changes are not in active branch: assert_not_in("first.txt", ds.repo.get_files(ds.repo.get_active_branch())) assert_not_in("second.txt", ds.repo.get_files(ds.repo.get_active_branch())) # but we know the changes in remote branches: assert_in("first.txt", ds.repo.get_files("sibling_1/master")) assert_in("second.txt", ds.repo.get_files("sibling_2/master")) # no merge strategy for multiple remotes yet: # more clever now, there is a tracking branch that provides a remote #assert_raises(NotImplementedError, ds.update, merge=True) # merge a certain remote: assert_result_count( ds.update( sibling='sibling_1', merge=True), 1, status='ok', type='dataset') # changes from sibling_2 still not present: assert_not_in("second.txt", ds.repo.get_files(ds.repo.get_active_branch())) # changes from sibling_1 merged: assert_in("first.txt", ds.repo.get_files(ds.repo.get_active_branch())) # it's known to annex, but has no content yet: ds.repo.get_file_key("first.txt") # raises if unknown eq_([False], ds.repo.file_has_content(["first.txt"]))
def test_submodule_deinit(path): from datalad.support.annexrepo import AnnexRepo top_repo = AnnexRepo(path, create=False) eq_({'subm 1', '2'}, {s.name for s in top_repo.get_submodules()}) # note: here init=True is ok, since we are using it just for testing with swallow_logs(new_level=logging.WARN) as cml: top_repo.update_submodule('subm 1', init=True) assert_in('Do not use update_submodule with init=True', cml.out) top_repo.update_submodule('2', init=True) # ok_(all([s.module_exists() for s in top_repo.get_submodules()])) # TODO: old assertion above if non-bare? (can't use "direct mode" in test_gitrepo) # Alternatively: New testrepo (plain git submodules) and have a dedicated # test for annexes in addition ok_(all([GitRepo.is_valid_repo(op.join(top_repo.path, s.path)) for s in top_repo.get_submodules()])) # modify submodule: with open(op.join(top_repo.path, 'subm 1', 'file_ut.dat'), "w") as f: f.write("some content") assert_raises(CommandError, top_repo.deinit_submodule, 'sub1') # using force should work: top_repo.deinit_submodule('subm 1', force=True) ok_(not top_repo.repo.submodule('subm 1').module_exists())
def test_AnnexRepo_get(src, dst): ar = AnnexRepo(dst, src) assert_is_instance(ar, AnnexRepo, "AnnexRepo was not created.") testfile = 'test-annex.dat' testfile_abs = os.path.join(dst, testfile) assert_false(ar.file_has_content("test-annex.dat")[0][1]) ar.annex_get(testfile) assert_true(ar.file_has_content("test-annex.dat")[0][1]) f = open(testfile_abs, 'r') assert_equal(f.readlines(), ['123\n'], "test-annex.dat's content doesn't match.")
def test_AnnexRepo_annex_add_to_git(src, dst): ar = AnnexRepo(dst, src) filename = 'file_to_git.dat' filename_abs = os.path.join(dst, filename) with open(filename_abs, 'w') as f: f.write("What to write?") assert_raises(IOError, ar.get_file_key, filename) ar.annex_add_to_git(filename) assert_in(filename, ar.get_indexed_files())
def ok_clean_git_annex_proxy(path): """Helper to check, whether an annex in direct mode is clean """ # TODO: May be let's make a method of AnnexRepo for this purpose ar = AnnexRepoNew(path) cwd = os.getcwd() os.chdir(path) try: out = ar.annex_proxy("git status") except CommandNotAvailableError, e: raise SkipTest
def test_publish_with_data(origin, src_path, dst_path): # prepare src source = install(path=src_path, source=origin, recursive=True) # TODO: For now, circumnavigate the detached head issue. # Figure out, what to do. for subds in source.get_dataset_handles(recursive=True): AnnexRepo(opj(src_path, subds), init=True, create=True).git_checkout("master") source.repo.get('test-annex.dat') # create plain git at target: target = AnnexRepo(dst_path, create=True) target.git_checkout("TMP", "-b") source.repo.git_remote_add("target", dst_path) res = publish(dataset=source, dest="target", with_data=['test-annex.dat']) eq_(res, source) eq_(list(target.git_get_branch_commits("master")), list(source.repo.git_get_branch_commits("master"))) # TODO: last commit in git-annex branch differs. Probably fine, # but figure out, when exactly to expect this for proper testing: eq_(list(target.git_get_branch_commits("git-annex"))[1:], list(source.repo.git_get_branch_commits("git-annex"))[1:]) # we need compare target/master: target.git_checkout("master") eq_(target.file_has_content(['test-annex.dat']), [True])
def _handle_possible_annex_dataset(dataset, reckless): # in any case check whether we need to annex-init the installed thing: if knows_annex(dataset.path): # init annex when traces of a remote annex can be detected if reckless: lgr.debug( "Instruct annex to hardlink content in %s from local " "sources, if possible (reckless)", dataset.path) dataset.config.add( 'annex.hardlink', 'true', where='local', reload=True) lgr.debug("Initializing annex repo at %s", dataset.path) repo = AnnexRepo(dataset.path, init=True) if reckless: repo._run_annex_command('untrust', annex_options=['here'])
def test_AnnexRepo_set_direct_mode(src, dst): ar = AnnexRepo(dst, src) ar.set_direct_mode(True) assert_true(ar.is_direct_mode(), "Switching to direct mode failed.") if ar.is_crippled_fs(): assert_raises(CommandNotAvailableError, ar.set_direct_mode, False) assert_true(ar.is_direct_mode(), "Indirect mode on crippled fs detected. Shouldn't be possible.") else: ar.set_direct_mode(False) assert_false(ar.is_direct_mode(), "Switching to indirect mode failed.")
def test_AnnexRepo_get_file_key(src, annex_path): ar = AnnexRepo(annex_path, src) # test-annex.dat should return the correct key: assert_equal(ar.get_file_key("test-annex.dat"), 'SHA256E-s4--181210f8f9c779c26da1d9b2075bde0127302ee0e3fca38c9a83f5b1dd8e5d3b.dat') # test.dat is actually in git # should raise Exception; also test for polymorphism assert_raises(IOError, ar.get_file_key, "test.dat") assert_raises(FileNotInAnnexError, ar.get_file_key, "test.dat") assert_raises(FileInGitError, ar.get_file_key, "test.dat") # filenotpresent.wtf doesn't even exist assert_raises(IOError, ar.get_file_key, "filenotpresent.wtf")
def mk_push_target(ds, name, path, annex=True, bare=True): # life could be simple, but nothing is simple on windows #src.create_sibling(dst_path, name='target') if annex: if bare: target = GitRepo(path=path, bare=True, create=True) # cannot use call_annex() target.call_git(['annex', 'init']) else: target = AnnexRepo(path, init=True, create=True) if not target.is_managed_branch(): # for managed branches we need more fireworks->below target.config.set('receive.denyCurrentBranch', 'updateInstead', where='local') else: target = GitRepo(path=path, bare=bare, create=True) ds.siblings('add', name=name, url=path, result_renderer=None) if annex and not bare and target.is_managed_branch(): # maximum complication # the target repo already has a commit that is unrelated # to the source repo, because it has built a reference # commit for the managed branch. # the only sane approach is to let git-annex establish a shared # history ds.repo.call_annex(['sync']) ds.repo.call_annex(['sync', '--cleanup']) return target
def test_install_known_subdataset(src, path): # get the superdataset: ds = install(path, source=src) # subdataset not installed: subds = Dataset(opj(path, 'subm 1')) assert_false(subds.is_installed()) assert_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths')) assert_not_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths')) # install it: ds.install('subm 1') ok_(subds.is_installed()) ok_(AnnexRepo.is_valid_repo(subds.path, allow_noninitialized=False)) # Verify that it is the correct submodule installed and not # new repository initiated eq_(set(subds.repo.get_indexed_files()), {'test.dat', 'INFO.txt', 'test-annex.dat'}) assert_not_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths')) assert_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths')) # now, get the data by reinstalling with -g: ok_(subds.repo.file_has_content('test-annex.dat') is False) with chpwd(ds.path): result = get(path='subm 1', dataset=os.curdir) assert_in_results(result, path=opj(subds.path, 'test-annex.dat')) ok_(subds.repo.file_has_content('test-annex.dat') is True) ok_(subds.is_installed())
def test_install_simple_local(src, path): origin = Dataset(path) # now install it somewhere else ds = install(path, source=src, description='mydummy') eq_(ds.path, path) ok_(ds.is_installed()) if not isinstance(origin.repo, AnnexRepo): # this means it is a GitRepo ok_(isinstance(origin.repo, GitRepo)) # stays plain Git repo ok_(isinstance(ds.repo, GitRepo)) ok_(not isinstance(ds.repo, AnnexRepo)) ok_(GitRepo.is_valid_repo(ds.path)) eq_(set(ds.repo.get_indexed_files()), {'test.dat', 'INFO.txt'}) ok_clean_git(path, annex=False) else: # must be an annex ok_(isinstance(ds.repo, AnnexRepo)) ok_(AnnexRepo.is_valid_repo(ds.path, allow_noninitialized=False)) eq_(set(ds.repo.get_indexed_files()), {'test.dat', 'INFO.txt', 'test-annex.dat'}) ok_clean_git(path, annex=True) # no content was installed: ok_(not ds.repo.file_has_content('test-annex.dat')) uuid_before = ds.repo.uuid eq_(ds.repo.get_description(), 'mydummy') # installing it again, shouldn't matter: res = install(path, source=src, result_xfm=None, return_type='list') assert_status('notneeded', res) ok_(ds.is_installed()) if isinstance(origin.repo, AnnexRepo): eq_(uuid_before, ds.repo.uuid)
def test_ls_repos(toppath): # smoke test pretty much GitRepo(toppath + '1', create=True) AnnexRepo(toppath + '2', create=True) repos = glob(toppath + '*') # now make that sibling directory from which we will ls later mkdir(toppath) def _test(*args_): #print args_ for args in args_: for recursive in [False, True]: # in both cases shouldn't fail with swallow_outputs() as cmo: ls(args, recursive=recursive) assert_equal(len(cmo.out.rstrip().split('\n')), len(args)) assert_in('[annex]', cmo.out) assert_in('[git]', cmo.out) assert_in('master', cmo.out) if "bogus" in args: assert_in('unknown', cmo.out) _test(repos, repos + ["/some/bogus/file"]) # check from within a sibling directory with relative paths with chpwd(toppath): _test([relpath(x, toppath) for x in repos])
def test_smoke_pipelines(d): # Just to verify that we can correctly establish the pipelines AnnexRepo(d, create=True) with chpwd(d): with swallow_logs(): for p in [pipeline('bogus'), collection_pipeline()]: ok_(len(p) > 1)
def test_AnnexRepo_annex_add(src, annex_path): ar = AnnexRepo(annex_path, src) filename = 'file_to_annex.dat' filename_abs = os.path.join(annex_path, filename) f = open(filename_abs, 'w') f.write("What to write?") f.close() ar.annex_add(filename) if not ar.is_direct_mode(): assert_true(os.path.islink(filename_abs), "Annexed file is not a link.") else: assert_false(os.path.islink(filename_abs), "Annexed file is link in direct mode.") key = ar.get_file_key(filename) assert_false(key == '')
def test_runnin_on_empty(path): # empty repo repo = AnnexRepo(path, create=True) # just wrap with a dataset ds = Dataset(path) # and run status ... should be good and do nothing eq_([], ds.status())
def test_notclone_known_subdataset(src, path): # get the superdataset: ds = clone(src, path, result_xfm='datasets', return_type='item-or-list') # subdataset not installed: subds = Dataset(ds.pathobj / 'subm 1') assert_false(subds.is_installed()) assert_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths')) assert_not_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths')) # clone is not meaningful res = ds.clone('subm 1', on_failure='ignore') assert_status('error', res) assert_message('Failed to clone from all attempted sources: %s', res) # get does the job res = ds.get(path='subm 1', get_data=False) assert_status('ok', res) ok_(subds.is_installed()) ok_(AnnexRepo.is_valid_repo(subds.path, allow_noninitialized=False)) # Verify that it is the correct submodule installed and not # new repository initiated eq_(set(subds.repo.get_indexed_files()), {'test.dat', 'INFO.txt', 'test-annex.dat'}) assert_not_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths')) assert_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths'))
def _wrap_cached_dataset(*arg, **kw): if DATALAD_TESTS_CACHE: # Note: We can't pass keys based on `paths` parameter to # get_cached_dataset yet, since translation to keys depends on a # worktree. We'll have the worktree of `version` only after cloning. ds = get_cached_dataset(url, version=version) clone_ds = Clone()(ds.pathobj, arg[-1]) else: clone_ds = Clone()(url, arg[-1]) if version: clone_ds.repo.checkout(version) if paths and AnnexRepo.is_valid_repo(clone_ds.path): # just assume ds is annex as well. Otherwise `Clone` wouldn't # work correctly - we don't need to test its implementation here if DATALAD_TESTS_CACHE: # cache is enabled; we need to make sure it has the desired # content, so clone_ds can get it from there. However, we got # `paths` and potentially a `version` they refer to. We can't # assume the same (or any) worktree in cache. Hence we need to # translate to keys. keys = clone_ds.repo.get_file_key(paths) ds.repo.get(keys, key=True) clone_ds.repo.fsck(remote='origin', fast=True) clone_ds.get(paths) return f(*(arg[:-1] + (clone_ds, )), **kw)
def test_unlock_raises(path, path2, path3): # make sure, we are not within a dataset: _cwd = getpwd() chpwd(path) # no dataset and no path: assert_raises(InsufficientArgumentsError, unlock, dataset=None, path=None) # no dataset and path not within a dataset: res = unlock(dataset=None, path=path2, result_xfm=None, on_failure='ignore', return_type='item-or-list') eq_(res['message'], "path not associated with any dataset") eq_(res['path'], path2) create(path=path, no_annex=True) ds = Dataset(path) # no complaints ds.unlock() # make it annex, but call unlock with invalid path: AnnexRepo(path, create=True) res = ds.unlock(path="notexistent.txt", result_xfm=None, on_failure='ignore', return_type='item-or-list') eq_(res['message'], "path does not exist") chpwd(_cwd)
def test_notclone_known_subdataset(src, path): # get the superdataset: ds = clone(src, path, result_xfm='datasets', return_type='item-or-list') # subdataset not installed: subds = Dataset(opj(path, 'subm 1')) assert_false(subds.is_installed()) assert_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths')) assert_not_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths')) # clone is not meaningful res = ds.clone('subm 1', on_failure='ignore') assert_status('error', res) assert_message('Failed to clone from any candidate source URL. ' 'Encountered errors per each url were: %s', res) # get does the job res = ds.get(path='subm 1', get_data=False) assert_status('ok', res) ok_(subds.is_installed()) ok_(AnnexRepo.is_valid_repo(subds.path, allow_noninitialized=False)) # Verify that it is the correct submodule installed and not # new repository initiated eq_(set(subds.repo.get_indexed_files()), {'test.dat', 'INFO.txt', 'test-annex.dat'}) assert_not_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths')) assert_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths'))
def test_symlinked_dataset_properties(repo1, repo2, repo3, non_repo, symlink): ds = Dataset(repo1).create() # now, let ds be a symlink and change that symlink to point to different # things: ar2 = AnnexRepo(repo2) ar3 = AnnexRepo(repo3) assert_true(os.path.isabs(non_repo)) os.symlink(repo1, symlink) ds_link = Dataset(symlink) assert_is(ds_link.repo, ds.repo) # same Repo instance assert_is_not(ds_link, ds) # but not the same Dataset instance assert_is(ds_link.config, ds.repo.config) assert_true(ds_link._cfg_bound) assert_is_not_none(ds_link.id) # same id, although different Dataset instance: assert_equal(ds_link.id, ds.id) os.unlink(symlink) os.symlink(repo2, symlink) assert_is(ds_link.repo, ar2) # same Repo instance assert_is(ds_link.config, ar2.config) assert_true(ds_link._cfg_bound) # id is None again, since this repository is an annex but there was no # Dataset.create() called yet. assert_is_none(ds_link.id) os.unlink(symlink) os.symlink(repo3, symlink) assert_is(ds_link.repo, ar3) # same Repo instance assert_is(ds_link.config, ar3.config) assert_true(ds_link._cfg_bound) # id is None again, since this repository is an annex but there was no # Dataset.create() called yet. assert_is_none(ds_link.id) os.unlink(symlink) os.symlink(non_repo, symlink) assert_is_none(ds_link.repo) assert_is_not(ds_link.config, ar3.config) assert_false(ds_link._cfg_bound) assert_is_none(ds_link.id)
def ok_clean_git_annex_proxy(path): """Helper to check, whether an annex in direct mode is clean """ # TODO: May be let's make a method of AnnexRepo for this purpose ar = AnnexRepo(path) cwd = getpwd() chpwd(path) try: out = ar.annex_proxy("git status") except CommandNotAvailableError as e: raise SkipTest finally: chpwd(cwd) assert_in("nothing to commit, working directory clean", out[0], "git-status output via proxy not plausible: %s" % out[0])
def test_smoke_pipelines(d): # Just to verify that we can correctly establish the pipelines AnnexRepo(d, create=True) with chpwd(d): with swallow_logs(): for p in [superdataset_pipeline(NITRC_IR)]: print(p) ok_(len(p) > 1)
def test_check_dates(path): skip_if_no_module("dateutil") ref_ts = 1218182889 # Fri, 08 Aug 2008 04:08:09 -0400 refdate = "@{}".format(ref_ts) repo = os.path.join(path, "repo") with set_date(ref_ts + 5000): ar = AnnexRepo(repo) ar.add(".") ar.commit() # The standard renderer outputs json. with swallow_outputs() as cmo: # Set level to WARNING to avoid the progress bar when # DATALAD_TESTS_UI_BACKEND=console. with swallow_logs(new_level=logging.WARNING): check_dates([repo], reference_date=refdate, return_type="list") assert_in("report", json.loads(cmo.out).keys()) # We find the newer objects. newer = call([path], reference_date=refdate) eq_(len(newer), 1) ok_(newer[0]["report"]["objects"]) # There are no older objects to find. older = call([repo], reference_date=refdate, older=True) assert_false(older[0]["report"]["objects"]) # We can pass the date in RFC 2822 format. assert_dict_equal( newer[0], call([path], reference_date="08 Aug 2008 04:08:09 -0400")[0]) # paths=None defaults to the current directory. with chpwd(path): assert_dict_equal( newer[0]["report"], call(paths=None, reference_date=refdate)[0]["report"]) # Only commit type is present when annex='none'. newer_noannex = call([path], reference_date=refdate, annex="none") for entry in newer_noannex[0]["report"]["objects"].values(): ok_(entry["type"] == "commit")
def put_file_under_git(path, filename=None, content=None, annexed=False): """Place file under git/annex and return used Repo """ annex, file_repo_path, filename, path, repo = _prep_file_under_git(path, filename) if content is None: content = "" with open(opj(repo.path, file_repo_path), 'w') as f_: f_.write(content) if annexed: if not isinstance(repo, AnnexRepo): repo = AnnexRepo(repo.path) repo.add(file_repo_path, commit=True, _datalad_msg=True) else: repo.add(file_repo_path, git=True, _datalad_msg=True) ok_file_under_git(repo.path, file_repo_path, annexed) return repo
def test_update_fetch_all(src, remote_1, remote_2): rmt1 = AnnexRepo(remote_1, src) rmt2 = AnnexRepo(remote_2, src) ds = Dataset(src) ds.add_sibling(name="sibling_1", url=remote_1) ds.add_sibling(name="sibling_2", url=remote_2) # modify the remotes: with open(opj(remote_1, "first.txt"), "w") as f: f.write("some file load") rmt1.add_to_annex("first.txt") # TODO: Modify an already present file! with open(opj(remote_2, "second.txt"), "w") as f: f.write("different file load") rmt2.git_add("second.txt") rmt2.git_commit("Add file to git.") # fetch all remotes ds.update(fetch_all=True) # no merge, so changes are not in active branch: assert_not_in("first.txt", ds.repo.git_get_files(ds.repo.git_get_active_branch())) assert_not_in("second.txt", ds.repo.git_get_files(ds.repo.git_get_active_branch())) # but we know the changes in remote branches: assert_in("first.txt", ds.repo.git_get_files("sibling_1/master")) assert_in("second.txt", ds.repo.git_get_files("sibling_2/master")) # no merge strategy for multiple remotes yet: assert_raises(NotImplementedError, ds.update, merge=True, fetch_all=True) # merge a certain remote: ds.update(name="sibling_1", merge=True) # changes from sibling_2 still not present: assert_not_in("second.txt", ds.repo.git_get_files(ds.repo.git_get_active_branch())) # changes from sibling_1 merged: assert_in("first.txt", ds.repo.git_get_files(ds.repo.git_get_active_branch())) # it's known to annex, but has no content yet: ds.repo.get_file_key("first.txt") # raises if unknown eq_([False], ds.repo.file_has_content(["first.txt"]))
def test_newthings_coming_down(originpath, destpath): origin = GitRepo(originpath, create=True) create_tree(originpath, {'load.dat': 'heavy'}) Dataset(originpath).save('load.dat') ds = install(source=originpath, path=destpath, result_xfm='datasets', return_type='item-or-list') assert_is_instance(ds.repo, GitRepo) assert_in('origin', ds.repo.get_remotes()) # turn origin into an annex origin = AnnexRepo(originpath, create=True) # clone doesn't know yet assert_false(knows_annex(ds.path)) # but after an update it should # no merge, only one sibling, no parameters should be specific enough assert_result_count(ds.update(), 1, status='ok', type='dataset') assert (knows_annex(ds.path)) # no branches appeared eq_(ds.repo.get_branches(), [DEFAULT_BRANCH]) # now merge, and get an annex assert_result_count(ds.update(merge=True), 1, action='update', status='ok', type='dataset') assert_in('git-annex', ds.repo.get_branches()) assert_is_instance(ds.repo, AnnexRepo) # should be fully functional testfname = opj(ds.path, 'load.dat') assert_false(ds.repo.file_has_content(testfname)) ds.get('.') ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy') # check that a new tag comes down origin.tag('first!') assert_result_count(ds.update(), 1, status='ok', type='dataset') eq_(ds.repo.get_tags(output='name')[0], 'first!') # and now we destroy the remote annex origin.call_git(['config', '--remove-section', 'annex']) rmtree(opj(origin.path, '.git', 'annex'), chmod_files=True) origin.call_git(['branch', '-D', 'git-annex']) origin = GitRepo(originpath) assert_false(knows_annex(originpath)) # and update the local clone # for now this should simply not fail (see gh-793), later might be enhanced to a # graceful downgrade before_branches = ds.repo.get_branches() assert_result_count(ds.update(), 1, status='ok', type='dataset') eq_(before_branches, ds.repo.get_branches()) # annex branch got pruned eq_(['origin/HEAD', 'origin/' + DEFAULT_BRANCH], ds.repo.get_remote_branches()) # check that a new tag comes down even if repo types mismatch origin.tag('second!') assert_result_count(ds.update(), 1, status='ok', type='dataset') eq_(ds.repo.get_tags(output='name')[-1], 'second!')
def test_get_contentlocation(tdir): repo = AnnexRepo(tdir, create=True, init=True) repo.add('file.dat') repo.commit('added file.dat') key = repo.get_file_key('file.dat') cr = AnnexCustomRemote(tdir) key_path = cr.get_contentlocation(key, absolute=False) assert not isabs(key_path) key_path_abs = cr.get_contentlocation(key, absolute=True) assert isabs(key_path_abs) assert cr._contentlocations == {key: key_path} repo.drop('file.dat', options=['--force']) assert not cr.get_contentlocation(key, absolute=True)
def test_update_fetch_all(src, remote_1, remote_2): rmt1 = AnnexRepo.clone(src, remote_1) rmt2 = AnnexRepo.clone(src, remote_2) ds = Dataset(src) ds.add_sibling(name="sibling_1", url=remote_1) ds.add_sibling(name="sibling_2", url=remote_2) # modify the remotes: with open(opj(remote_1, "first.txt"), "w") as f: f.write("some file load") rmt1.add("first.txt", commit=True) # TODO: Modify an already present file! with open(opj(remote_2, "second.txt"), "w") as f: f.write("different file load") rmt2.add("second.txt", git=True, commit=True, msg="Add file to git.") # fetch all remotes ds.update(fetch_all=True) # no merge, so changes are not in active branch: assert_not_in("first.txt", ds.repo.get_files(ds.repo.get_active_branch())) assert_not_in("second.txt", ds.repo.get_files(ds.repo.get_active_branch())) # but we know the changes in remote branches: assert_in("first.txt", ds.repo.get_files("sibling_1/master")) assert_in("second.txt", ds.repo.get_files("sibling_2/master")) # no merge strategy for multiple remotes yet: assert_raises(NotImplementedError, ds.update, merge=True, fetch_all=True) # merge a certain remote: ds.update(name="sibling_1", merge=True) # changes from sibling_2 still not present: assert_not_in("second.txt", ds.repo.get_files(ds.repo.get_active_branch())) # changes from sibling_1 merged: assert_in("first.txt", ds.repo.get_files(ds.repo.get_active_branch())) # it's known to annex, but has no content yet: ds.repo.get_file_key("first.txt") # raises if unknown eq_([False], ds.repo.file_has_content(["first.txt"]))
def test_AnnexRepo_get_file_key(src, annex_path): ar = AnnexRepo(annex_path, src) # test-annex.dat should return the correct key: assert_equal( ar.get_file_key("test-annex.dat"), 'SHA256E-s4--181210f8f9c779c26da1d9b2075bde0127302ee0e3fca38c9a83f5b1dd8e5d3b.dat' ) # test.dat is actually in git # should raise Exception; also test for polymorphism assert_raises(IOError, ar.get_file_key, "test.dat") assert_raises(FileNotInAnnexError, ar.get_file_key, "test.dat") assert_raises(FileInGitError, ar.get_file_key, "test.dat") # filenotpresent.wtf doesn't even exist assert_raises(IOError, ar.get_file_key, "filenotpresent.wtf")
def test_get_dataset_root(path): eq_(get_dataset_root('/nonexistent'), None) with chpwd(path): repo = AnnexRepo(os.curdir, create=True) subdir = opj('some', 'deep') fname = opj(subdir, 'dummy') os.makedirs(subdir) with open(fname, 'w') as f: f.write('some') repo.add(fname) # we can find this repo eq_(get_dataset_root(os.curdir), os.curdir) # and we get the type of path that we fed in eq_(get_dataset_root(abspath(os.curdir)), abspath(os.curdir)) # subdirs are no issue eq_(get_dataset_root(subdir), os.curdir) # non-dir paths are no issue eq_(get_dataset_root(fname), os.curdir)
def test_knows_annex(here, there): from datalad.support.gitrepo import GitRepo from datalad.support.annexrepo import AnnexRepo GitRepo(path=here, create=True) assert_false(knows_annex(here)) AnnexRepo(path=here, create=True) assert_true(knows_annex(here)) GitRepo.clone(path=there, url=here, create=True) assert_true(knows_annex(there))
def _test_crawl_init_error(args, template, template_func, target_value, tmpdir): ar = AnnexRepo(tmpdir) with chpwd(tmpdir): assert_raises(target_value, crawl_init, args=args, template=template, template_func=template_func)
def test_get_contentlocation(tdir=None): repo = AnnexRepo(tdir, create=True, init=True) repo.add('file.dat') repo.commit('added file.dat') # TODO contentlocation would come with eval_availability=True key = repo.get_file_annexinfo('file.dat')['key'] cr = ArchiveAnnexCustomRemote(None, path=tdir) key_path = cr.get_contentlocation(key, absolute=False) assert not isabs(key_path) key_path_abs = cr.get_contentlocation(key, absolute=True) assert isabs(key_path_abs) assert cr._contentlocations == {key: key_path} repo.drop('file.dat', options=['--force']) assert not cr.get_contentlocation(key, absolute=True)
def test_check_dates(path): skip_if_no_module("dateutil") ref_ts = 1218182889 # Fri, 08 Aug 2008 04:08:09 -0400 refdate = "@{}".format(ref_ts) repo = os.path.join(path, "repo") with set_date(ref_ts + 5000): ar = AnnexRepo(repo) ar.add(".", commit=True) # The standard renderer outputs json. with swallow_outputs() as cmo: # Set level to WARNING to avoid the progress bar when # DATALAD_TESTS_UI_BACKEND=console. with swallow_logs(new_level=logging.WARNING): check_dates([repo], reference_date=refdate, return_type="list") assert_in("report", json.loads(cmo.out).keys()) # We find the newer objects. newer = call([path], reference_date=refdate) eq_(len(newer), 1) ok_(newer[0]["report"]["objects"]) # There are no older objects to find. older = call([repo], reference_date=refdate, older=True) assert_false(older[0]["report"]["objects"]) # We can pass the date in RFC 2822 format. assert_dict_equal( newer[0], call([path], reference_date="08 Aug 2008 04:08:09 -0400")[0]) # paths=None defaults to the current directory. with chpwd(path): assert_dict_equal( newer[0]["report"], call(paths=None, reference_date=refdate)[0]["report"]) # Only commit type is present when annex='none'. newer_noannex = call([path], reference_date=refdate, annex="none") for entry in newer_noannex[0]["report"]["objects"].values(): ok_(entry["type"] == "commit")
def test_balsa_pipeline1(ind, topurl, outd, clonedir): list(initiate_dataset( template="balsa", dataset_name='dataladtest-WG33', path=outd, data_fields=['dataset_id'])({'dataset_id': 'WG33'})) with chpwd(outd): pipeline = ofpipeline('WG33', url=topurl) out = run_pipeline(pipeline) eq_(len(out), 1) repo = AnnexRepo(outd, create=False) # to be used in the checks # Inspect the tree -- that we have all the branches branches = {'master', 'incoming', 'incoming-processed', 'git-annex'} eq_(set(repo.get_branches()), branches) # since now we base incoming on master -- and there were nothing custom # in master after incoming-processed, both branches should be the same eq_(repo.get_hexsha('master'), repo.get_hexsha('incoming-processed')) # but that one is different from incoming assert_not_equal(repo.get_hexsha('incoming'), repo.get_hexsha('incoming-processed')) commits = {b: list(repo.get_branch_commits(b)) for b in branches} # all commits out there -- init ds + init crawler + 1*(incoming, processed) # The number of commits in master differs based on the create variant used # (the one DataLad's master makes only one commit). ncommits_master = len(commits["master"]) assert_in(ncommits_master, [4, 5]) # incoming branches from master but lacks one merge commit. eq_(len(commits['incoming']), ncommits_master - 1) # incoming-processed is on master. eq_(len(commits['incoming-processed']), ncommits_master) with chpwd(outd): eq_(set(glob('*')), {'dir1', 'file1.nii'}) all_files = sorted(find_files('.')) fpath = opj(outd, 'file1.nii') ok_file_has_content(fpath, "content of file1.nii") ok_file_under_git(fpath, annexed=True) fpath2 = opj(outd, 'dir1', 'file2.nii') ok_file_has_content(fpath2, "content of file2.nii") ok_file_under_git(fpath2, annexed=True) target_files = { './.datalad/crawl/crawl.cfg', './.datalad/crawl/statuses/incoming.json', './.datalad/meta/balsa.json', './.datalad/config', './file1.nii', './dir1/file2.nii', } eq_(set(all_files), target_files)
def clone_url(url): # delay import of our code until needed for certain from ..cmd import Runner runner = Runner() tdir = tempfile.mkdtemp(**get_tempfile_kwargs({}, prefix='clone_url')) _ = runner(["git", "clone", url, tdir], expect_stderr=True) if GitRepo(tdir).is_with_annex(): AnnexRepo(tdir, init=True) _TEMP_PATHS_CLONES.add(tdir) return tdir
def test_publish_simple(origin, src_path, dst_path): # prepare src source = install(path=src_path, source=origin, recursive=True) # TODO: For now, circumnavigate the detached head issue. # Figure out, what to do. for subds in source.get_dataset_handles(recursive=True): AnnexRepo(opj(src_path, subds), init=True, create=True).git_checkout("master") # forget we cloned it (provide no 'origin' anymore), which should lead to # setting tracking branch to target: source.repo.git_remote_remove("origin") # create plain git at target: target = GitRepo(dst_path, create=True) target.git_checkout("TMP", "-b") source.repo.git_remote_add("target", dst_path) res = publish(dataset=source, dest="target") eq_(res, source) ok_clean_git(src_path, annex=False) ok_clean_git(dst_path, annex=False) eq_(list(target.git_get_branch_commits("master")), list(source.repo.git_get_branch_commits("master"))) # don't fail when doing it again res = publish(dataset=source, dest="target") eq_(res, source) ok_clean_git(src_path, annex=False) ok_clean_git(dst_path, annex=False) eq_(list(target.git_get_branch_commits("master")), list(source.repo.git_get_branch_commits("master"))) eq_(list(target.git_get_branch_commits("git-annex")), list(source.repo.git_get_branch_commits("git-annex"))) # 'target/master' should be tracking branch at this point, so # try publishing without `dest`: # some modification: with open(opj(src_path, 'test_mod_file'), "w") as f: f.write("Some additional stuff.") source.repo.git_add(opj(src_path, 'test_mod_file')) source.repo.git_commit("Modified.") ok_clean_git(src_path, annex=False) res = publish(dataset=source) eq_(res, source) ok_clean_git(dst_path, annex=False) eq_(list(target.git_get_branch_commits("master")), list(source.repo.git_get_branch_commits("master"))) eq_(list(target.git_get_branch_commits("git-annex")), list(source.repo.git_get_branch_commits("git-annex")))
def test_balsa_pipeline1(ind, topurl, outd, clonedir): list( initiate_dataset(template="balsa", dataset_name='dataladtest-WG33', path=outd, data_fields=['dataset_id'])({ 'dataset_id': 'WG33' })) with chpwd(outd): pipeline = ofpipeline('WG33', url=topurl) out = run_pipeline(pipeline) eq_(len(out), 1) repo = AnnexRepo(outd, create=False) # to be used in the checks # Inspect the tree -- that we have all the branches branches = {'master', 'incoming', 'incoming-processed', 'git-annex'} eq_(set(repo.get_branches()), branches) assert_not_equal(repo.get_hexsha('master'), repo.get_hexsha('incoming-processed')) # and that one is different from incoming assert_not_equal(repo.get_hexsha('incoming'), repo.get_hexsha('incoming-processed')) commits = {b: list(repo.get_branch_commits(b)) for b in branches} eq_(len(commits['incoming']), 1) eq_(len(commits['incoming-processed']), 2) eq_( len(commits['master']), 6 ) # all commits out there -- init ds + init crawler + 1*(incoming, processed, merge) with chpwd(outd): eq_(set(glob('*')), {'dir1', 'file1.nii'}) all_files = sorted(find_files('.')) fpath = opj(outd, 'file1.nii') ok_file_has_content(fpath, "content of file1.nii") ok_file_under_git(fpath, annexed=True) fpath2 = opj(outd, 'dir1', 'file2.nii') ok_file_has_content(fpath2, "content of file2.nii") ok_file_under_git(fpath2, annexed=True) target_files = { './.datalad/crawl/crawl.cfg', './.datalad/crawl/statuses/incoming.json', './.datalad/meta/balsa.json', './.datalad/config', './file1.nii', './dir1/file2.nii', } eq_(set(all_files), target_files)
def test_publish_recursive(origin, src_path, dst_path, sub1_pub, sub2_pub): # prepare src source = install(path=src_path, source=origin, recursive=True) # TODO: For now, circumnavigate the detached head issue. # Figure out, what to do. for subds in source.get_dataset_handles(recursive=True): AnnexRepo(opj(src_path, subds), init=True, create=True).git_checkout("master") # create plain git at target: target = GitRepo(dst_path, create=True) target.git_checkout("TMP", "-b") source.repo.git_remote_add("target", dst_path) # subdatasets have no remote yet, so recursive publishing should fail: with assert_raises(ValueError) as cm: publish(dataset=source, dest="target", recursive=True) assert_in("No sibling 'target' found.", str(cm.exception)) # now, set up targets for the submodules: sub1_target = GitRepo(sub1_pub, create=True) sub1_target.git_checkout("TMP", "-b") sub2_target = GitRepo(sub2_pub, create=True) sub2_target.git_checkout("TMP", "-b") sub1 = GitRepo(opj(src_path, 'sub1'), create=False) sub2 = GitRepo(opj(src_path, 'sub2'), create=False) sub1.git_remote_add("target", sub1_pub) sub2.git_remote_add("target", sub2_pub) # publish recursively res = publish(dataset=source, dest="target", recursive=True) # testing result list # (Note: Dataset lacks __eq__ for now. Should this be based on path only?) assert_is_instance(res, list) for item in res: assert_is_instance(item, Dataset) eq_(res[0].path, src_path) eq_(res[1].path, sub1.path) eq_(res[2].path, sub2.path) eq_(list(target.git_get_branch_commits("master")), list(source.repo.git_get_branch_commits("master"))) eq_(list(target.git_get_branch_commits("git-annex")), list(source.repo.git_get_branch_commits("git-annex"))) eq_(list(sub1_target.git_get_branch_commits("master")), list(sub1.git_get_branch_commits("master"))) eq_(list(sub1_target.git_get_branch_commits("git-annex")), list(sub1.git_get_branch_commits("git-annex"))) eq_(list(sub2_target.git_get_branch_commits("master")), list(sub2.git_get_branch_commits("master"))) eq_(list(sub2_target.git_get_branch_commits("git-annex")), list(sub2.git_get_branch_commits("git-annex")))
def _test_annex_version_comparison(v, cmp_): class _runner(object): def run(self, cmd, *args, **kwargs): return dict(stdout=v, stderr="") ev = ExternalVersions() with set_annex_version(None), \ patch('datalad.support.external_versions._runner', _runner()), \ patch('datalad.support.annexrepo.external_versions', ExternalVersions()): ev['cmd:annex'] < AnnexRepo.GIT_ANNEX_MIN_VERSION if cmp_ in (1, 0): AnnexRepo._check_git_annex_version() if cmp_ == 0: assert_equal(AnnexRepo.git_annex_version, v) elif cmp == -1: with assert_raises(OutdatedExternalDependency): ev.check('cmd:annex', min_version=AnnexRepo.GIT_ANNEX_MIN_VERSION) with assert_raises(OutdatedExternalDependency): AnnexRepo._check_git_annex_version()
def test_AnnexRepo_instance_from_clone(src, dst): ar = AnnexRepo(dst, src) assert_is_instance(ar, AnnexRepo, "AnnexRepo was not created.") assert_true(os.path.exists(os.path.join(dst, '.git', 'annex'))) # do it again should raise GitCommandError since git will notice there's already a git-repo at that path # and therefore can't clone to `dst` with swallow_logs() as cm: assert_raises(GitCommandError, AnnexRepo, dst, src) assert ("already exists" in cm.out)
def test_bare(src=None, path=None): # create a proper datalad dataset with all bells and whistles ds = Dataset(src).create() dlconfig_sha = ds.repo.call_git(['rev-parse', 'HEAD:.datalad/config']) # can we handle a bare repo version of it? gr = AnnexRepo.clone(src, path, clone_options=['--bare', '-b', DEFAULT_BRANCH]) # we had to specifically checkout the standard branch, because on crippled # FS, HEAD will point to an adjusted branch by default, and the test logic # below does not account for this case. # this should just make sure the bare repo has the expected setup, # but it should still be bare. Let's check that to be sure assert_true(gr.bare) # do we read the correct local config? assert_in(gr.pathobj / 'config', gr.config._stores['git']['files']) # do we pick up the default branch config too? assert_in('blob:HEAD:.datalad/config', gr.config._stores['branch']['files']) # and track its reload stamp via its file shasum assert_equal( dlconfig_sha, gr.config._stores['branch']['stats']['blob:HEAD:.datalad/config']) # check that we can pick up the dsid from the commit branch config assert_equal(ds.id, gr.config.get('datalad.dataset.id')) # and it is coming from the correct source assert_equal(ds.id, gr.config.get_from_source('branch', 'datalad.dataset.id')) assert_equal(None, gr.config.get_from_source('local', 'datalad.dataset.id')) # any sensible (and also our CI) test environment(s) should have this assert_in('user.name', gr.config) # not set something that wasn't there obscure_key = 'sec.reallyobscurename!@@.key' assert_not_in(obscure_key, gr.config) # to the local config, which is easily accessible gr.config.set(obscure_key, 'myvalue', scope='local') assert_equal(gr.config.get(obscure_key), 'myvalue') # now make sure the config is where we think it is assert_in(obscure_key.split('.')[1], (gr.pathobj / 'config').read_text()) # update committed config and check update old_id = ds.id ds.config.set('datalad.dataset.id', 'surprise!', scope='branch') ds.save() # fetch into default branch (like `update`, but for bare-repos) gr.call_git( ['fetch', f'{DEFAULT_REMOTE}', f'{DEFAULT_BRANCH}:{DEFAULT_BRANCH}']) # without a reload, no state change, like with non-bare repos assert_equal(old_id, gr.config.get_from_source('branch', 'datalad.dataset.id')) # a non-forced reload() must be enough, because state change # detection kicks in gr.config.reload() assert_equal('surprise!', gr.config.get('datalad.dataset.id'))
def test_ls_noarg(toppath): # smoke test pretty much AnnexRepo(toppath, create=True) # this test is pointless for now and until ls() actually returns # something with swallow_outputs(): ls_out = ls(toppath) with chpwd(toppath): assert_equal(ls_out, ls([])) assert_equal(ls_out, ls('.'))
def test_interactions(tdir): # Just a placeholder since constructor expects a repo repo = AnnexRepo(tdir, create=True, init=True) repo.add('file.dat') repo.commit('added file.dat') for scenario in BASE_INTERACTION_SCENARIOS + [ [ ('GETAVAILABILITY', 'AVAILABILITY %s' % DEFAULT_AVAILABILITY), ('GETCOST', 'COST %d' % DEFAULT_COST), ('TRANSFER RETRIEVE somekey somefile', re.compile('TRANSFER-FAILURE RETRIEVE somekey NotImplementedError().*')), ], [ # by default we do not require any fancy init # no urls supported by default ('CLAIMURL http://example.com', 'CLAIMURL-FAILURE'), # we know that is just a single option, url, is expected so full # one would be passed ('CLAIMURL http://example.com roguearg', 'CLAIMURL-FAILURE'), ] ]: check_interaction_scenario(AnnexCustomRemote, tdir, scenario)
def test_gh1426(origin_path, target_path): # set up a pair of repos, one the published copy of the other origin = create(origin_path) target = AnnexRepo(target_path, create=True) target.config.set( 'receive.denyCurrentBranch', 'updateInstead', where='local') origin.siblings('add', name='target', url=target_path) origin.publish(to='target') ok_clean_git(origin.path) ok_clean_git(target.path) eq_(origin.repo.get_hexsha(), target.get_hexsha()) # gist of #1426 is that a newly added subdataset does not cause the # superdataset to get published origin.create('sub') ok_clean_git(origin.path) assert_not_equal(origin.repo.get_hexsha(), target.get_hexsha()) # now push res = origin.publish(to='target') assert_result_count(res, 1) assert_result_count(res, 1, status='ok', type='dataset', path=origin.path) eq_(origin.repo.get_hexsha(), target.get_hexsha())
def test_is_installed(src, path): ds = Dataset(path) assert_false(ds.is_installed()) # get a clone: AnnexRepo.clone(src, path) ok_(ds.is_installed()) # submodule still not installed: subds = Dataset(opj(path, 'subm 1')) assert_false(subds.is_installed()) subds.create() # get the submodule # This would init so there is a .git file with symlink info, which is # as we agreed is more pain than gain, so let's use our install which would # do it right, after all we are checking 'is_installed' ;) # from datalad.cmd import Runner # Runner().run(['git', 'submodule', 'update', '--init', 'subm 1'], cwd=path) with chpwd(path): get('subm 1') ok_(subds.is_installed()) # wipe it out rmtree(ds.path) assert_false(ds.is_installed())
def check_compress_file(ext, annex, path, name): archive = name + ext compress_files([_filename], archive, path=path) assert_true(exists(archive)) if annex: # It should work even when file is annexed and is a symlink to the # key from datalad.support.annexrepo import AnnexRepo repo = AnnexRepo(path, init=True) repo.add(_filename) repo.commit(files=[_filename], msg="commit") dir_extracted = name + "_extracted" try: decompress_file(archive, dir_extracted) except MissingExternalDependency as exc: raise SkipTest(exc_str(exc)) _filepath = op.join(dir_extracted, _filename) import glob print(dir_extracted) print(glob.glob(dir_extracted + '/*')) ok_file_has_content(_filepath, 'content')