def _check_ri(ri, cls, exact_str=True, localpath=None, **fields): """just a helper to carry out few checks on urls""" with swallow_logs(new_level=logging.DEBUG) as cml: ri_ = cls(**fields) murl = RI(ri) eq_(murl.__class__, cls) # not just a subclass eq_(murl, ri_) eq_(str(RI(ri)), ri) eq_(eval(repr(ri_)), ri) # repr leads back to identical ri_ eq_(ri, ri_) # just in case ;) above should fail first if smth is wrong if not exact_str: assert_in('Parsed version of', cml.out) (eq_ if exact_str else neq_)(ri, str(ri_)) # that we can reconstruct it EXACTLY on our examples # and that we have access to all those fields nok_(set(fields).difference(set(cls._FIELDS))) for f, v in fields.items(): eq_(getattr(ri_, f), v) if localpath: eq_(ri_.localpath, localpath) old_localpath = ri_.localpath # for a test below else: # if not given -- must be a remote url, should raise exception with assert_raises(ValueError): ri_.localpath # do changes in the path persist? old_str = str(ri_) ri_.path = newpath = opj(ri_.path, 'sub') eq_(ri_.path, newpath) neq_(str(ri_), old_str) if localpath: eq_(ri_.localpath, opj(old_localpath, 'sub'))
def test_reckless(src, top_path, sharedpath): ds = clone(src, top_path, reckless=True, result_xfm='datasets', return_type='item-or-list') eq_(ds.config.get('annex.hardlink', None), 'true') # actual value is 'auto', because True is a legacy value and we map it eq_(ds.config.get('datalad.clone.reckless', None), 'auto') eq_(ds.repo.repo_info()['untrusted repositories'][0]['here'], True) # now, if we clone another repo into this one, it will inherit the setting # without having to provide it explicitly sub = ds.clone(src, 'sub', result_xfm='datasets', return_type='item-or-list') eq_(sub.config.get('datalad.clone.reckless', None), 'auto') eq_(sub.config.get('annex.hardlink', None), 'true') if ds.repo.is_managed_branch(): raise SkipTest("Remainder of test needs proper filesystem permissions") # the standard setup keeps the annex locks accessible to the user only nok_((ds.pathobj / '.git' / 'annex' / 'index.lck').stat().st_mode \ & stat.S_IWGRP) # but we can set it up for group-shared access too sharedds = clone( src, sharedpath, reckless='shared-group', result_xfm='datasets', return_type='item-or-list') ok_((sharedds.pathobj / '.git' / 'annex' / 'index.lck').stat().st_mode \ & stat.S_IWGRP)
def test_url_base(): # Basic checks assert_raises(ValueError, URL, "http://example.com", hostname='example.com') url = URL("http://example.com") eq_(url.hostname, 'example.com') eq_(url.scheme, 'http') eq_(url.port, '') # not specified -- empty strings eq_(url.username, '') # not specified -- empty strings eq_(repr(url), "URL(hostname='example.com', scheme='http')") eq_(url, "http://example.com") # automagic coercion in __eq__ neq_(URL(), URL(hostname='x')) smth = URL('smth') eq_(smth.hostname, '') ok_(bool(smth)) nok_(bool(URL())) assert_raises(ValueError, url._set_from_fields, unknown='1') with swallow_logs(new_level=logging.WARNING) as cml: # we don't "care" about params ATM so there is a warning if there are any purl = URL("http://example.com/;param") eq_(str(purl), 'http://example.com/;param') # but we do maintain original string assert_in('ParseResults contains params', cml.out) eq_(purl.as_str(), 'http://example.com/')
def test_copy_file(workdir, webdir, weburl): workdir = Path(workdir) webdir = Path(webdir) src_ds = Dataset(workdir / 'src').create() # put a file into the dataset by URL and drop it again src_ds.download_url('/'.join((weburl, 'webfile1')), path='myfile1.txt') src_ds.download_url('/'.join((weburl, 'webfile2')), path=opj('subdir', 'myfile2.txt')) ok_file_has_content(src_ds.pathobj / 'myfile1.txt', '123') # now create a fresh dataset dest_ds = Dataset(workdir / 'dest').create() if dest_ds.repo._check_version_kludges("fromkey-supports-unlocked") or \ not dest_ds.repo.is_managed_branch(): # unless we have a target ds on a cripples FS (where `annex fromkey` # doesn't work until after 8.20210428), we can even drop the file # content in the source repo src_ds.drop('myfile1.txt', check=False) nok_(src_ds.repo.file_has_content('myfile1.txt')) # copy the file from the source dataset into it. # it must copy enough info to actually put datalad into the position # to obtain the file content from the original URL dest_ds.copy_file(src_ds.pathobj / 'myfile1.txt') dest_ds.get('myfile1.txt') ok_file_has_content(dest_ds.pathobj / 'myfile1.txt', '123') # purposefully pollute the employed tmp folder to check that we do not trip # over such a condition tmploc = dest_ds.pathobj / '.git' / 'tmp' / 'datalad-copy' / 'some' tmploc.parent.mkdir(parents=True) tmploc.touch() # copy again, but to different target file name # (source+dest pair now) dest_ds.copy_file( [src_ds.pathobj / 'myfile1.txt', dest_ds.pathobj / 'renamed.txt']) ok_file_has_content(dest_ds.pathobj / 'renamed.txt', '123') # copying more than one at once dest_ds.copy_file([ src_ds.pathobj / 'myfile1.txt', src_ds.pathobj / 'subdir' / 'myfile2.txt', dest_ds.pathobj ]) # copy directly from a non-dataset location dest_ds.copy_file(webdir / 'webfile1') # copy from annex dataset into gitrepo git_ds = Dataset(workdir / 'git').create(annex=False) git_ds.copy_file(src_ds.pathobj / 'subdir' / 'myfile2.txt')
def check_reckless(annex, src_path, top_path, sharedpath): # super with or without annex src = Dataset(src_path).create(annex=annex) # sub always with annex srcsub = src.create('sub') # and for the actual test ds = clone(src.path, top_path, reckless=True, result_xfm='datasets', return_type='item-or-list') is_crippled = srcsub.repo.is_managed_branch() if annex and not is_crippled: eq_(ds.config.get('annex.hardlink', None), 'true') # actual value is 'auto', because True is a legacy value and we map it eq_(ds.config.get('datalad.clone.reckless', None), 'auto') if annex: eq_(ds.repo.repo_info()['untrusted repositories'][0]['here'], True) # now, if we clone another repo into this one, it will inherit the setting # without having to provide it explicitly newsub = ds.clone(srcsub, 'newsub', result_xfm='datasets', return_type='item-or-list') # and `get` the original subdataset origsub = ds.get('sub', result_xfm='datasets', return_type='item-or-list') for sds in (newsub, origsub): eq_(sds.config.get('datalad.clone.reckless', None), 'auto') if not is_crippled: eq_(sds.config.get('annex.hardlink', None), 'true') if is_crippled: raise SkipTest("Remainder of test needs proper filesystem permissions") if annex: # the standard setup keeps the annex locks accessible to the user only nok_((ds.pathobj / '.git' / 'annex' / 'index.lck').stat().st_mode \ & stat.S_IWGRP) # but we can set it up for group-shared access too sharedds = clone( src, sharedpath, reckless='shared-group', result_xfm='datasets', return_type='item-or-list') ok_((sharedds.pathobj / '.git' / 'annex' / 'index.lck').stat().st_mode \ & stat.S_IWGRP)
def test_path_startswith(): ok_(path_startswith('/a/b', '/a')) ok_(path_startswith('/a/b', '/')) ok_(path_startswith('/aaa/b/c', '/aaa')) nok_(path_startswith('/aaa/b/c', '/aa')) nok_(path_startswith('/a/b', '/a/c')) nok_(path_startswith('/a/b/c', '/a/c'))
def test_copy_file_prevent_dotgit_placement(srcpath, destpath): src = Dataset(srcpath).create() sub = src.create('sub') dest = Dataset(destpath).create() # recursion doesn't capture .git/ dest.copy_file(sub.path, recursive=True) nok_((dest.pathobj / 'sub' / '.git').exists()) # explicit instruction results in failure assert_status( 'impossible', dest.copy_file(sub.pathobj / '.git', recursive=True, on_failure='ignore')) # same when the source has an OK name, but the dest now assert_in_results(dest.copy_file( [sub.pathobj / '.git' / 'config', dest.pathobj / 'some', '.git'], on_failure='ignore'), status='impossible', action='copy_file')
def test_is_datalad_compat_ri(): ok_(is_datalad_compat_ri('ssh://*****:*****@host/path')) ok_(is_datalad_compat_ri('http://example.com')) ok_(is_datalad_compat_ri('file://localhost/some')) ok_(is_datalad_compat_ri('///localhost/some')) nok_(is_datalad_compat_ri('relative')) nok_(is_datalad_compat_ri('.///localhost/some')) nok_(is_datalad_compat_ri(123))
def test_path_startswith(): ok_(path_startswith('/a/b', '/a')) ok_(path_startswith('/a/b', '/a/b')) ok_(path_startswith('/a/b', '/a/b/')) ok_(path_startswith('/a/b/', '/a/b')) ok_(path_startswith('/a/b', '/')) ok_(path_startswith('/aaa/b/c', '/aaa')) nok_(path_startswith('/aaa/b/c', '/aa')) nok_(path_startswith('/a/b', '/a/c')) nok_(path_startswith('/a/b/c', '/a/c')) # must not mix relative and abs assert_raises(ValueError, path_startswith, 'a/b', '/a') assert_raises(ValueError, path_startswith, '/a/b', 'a')
def test_copy_file_prevent_dotgit_placement(srcpath, destpath): src = Dataset(srcpath).create() sub = src.create('sub') dest = Dataset(destpath).create() # recursion doesn't capture .git/ dest.copy_file(sub.path, recursive=True) nok_((dest.pathobj / 'sub' / '.git').exists()) # explicit instruction results in failure assert_status( 'impossible', dest.copy_file(sub.pathobj / '.git', recursive=True, on_failure='ignore')) # same when the source has an OK name, but the dest now assert_in_results(dest.copy_file( [sub.pathobj / '.git' / 'config', dest.pathobj / 'some' / '.git'], on_failure='ignore'), status='impossible', action='copy_file') # The last path above wasn't treated as a target directory because it # wasn't an existing directory. We also guard against a '.git' in the # target directory code path, though the handling is different. with assert_raises(ValueError): dest.copy_file( [sub.pathobj / '.git' / 'config', dest.pathobj / '.git']) # A source path can have a leading .git/ if the destination is outside of # .git/. nok_((dest.pathobj / "config").exists()) dest.copy_file(sub.pathobj / '.git' / 'config') ok_((dest.pathobj / "config").exists()) target = dest.pathobj / 'some' nok_(target.exists()) dest.copy_file([sub.pathobj / '.git' / 'config', target]) ok_(target.exists()) # But we only waste so many cycles trying to prevent foot shooting. This # next one sneaks by because only .name, not all upstream parts, is checked # for each destination that comes out of _yield_specs(). badobj = dest.pathobj / '.git' / 'objects' / 'i-do-not-exist' dest.copy_file([sub.pathobj / '.git' / 'config', badobj]) ok_(badobj.exists())
def test_is_url(): ok_(is_url('file://localhost/some')) ok_(is_url('http://localhost')) ok_(is_url('ssh://me@localhost')) # in current understanding it is indeed a url but an 'ssh', implicit=True, not just # a useless scheme=weired with a hope to point to a netloc with swallow_logs(): ok_(is_url('weired://')) nok_(is_url('relative')) nok_(is_url('/absolute')) ok_(is_url('like@sshlogin')) # actually we do allow ssh:implicit urls ATM nok_(is_url('')) nok_(is_url(' ')) nok_(is_url(123)) # stuff of other types wouldn't be considered a URL # we can pass RI instance directly ok_(is_url(RI('file://localhost/some'))) nok_(is_url(RI('relative')))
def check_reckless(annex, src_path, top_path, sharedpath): # super with or without annex src = Dataset(src_path).create(annex=annex) # sub always with annex srcsub = src.create('sub') # and for the actual test ds = clone(src.path, top_path, reckless=True, result_xfm='datasets', return_type='item-or-list') is_crippled = srcsub.repo.is_managed_branch() if annex and not is_crippled: eq_(ds.config.get('annex.hardlink', None), 'true') # actual value is 'auto', because True is a legacy value and we map it eq_(ds.config.get('datalad.clone.reckless', None), 'auto') if annex: eq_(ds.repo.repo_info()['untrusted repositories'][0]['here'], True) # now, if we clone another repo into this one, it will inherit the setting # without having to provide it explicitly newsub = ds.clone(srcsub, 'newsub', result_xfm='datasets', return_type='item-or-list') # and `get` the original subdataset origsub = ds.get('sub', result_xfm='datasets', return_type='item-or-list') for sds in (newsub, origsub): eq_(sds.config.get('datalad.clone.reckless', None), 'auto') if not is_crippled: eq_(sds.config.get('annex.hardlink', None), 'true') if is_crippled: raise SkipTest("Remainder of test needs proper filesystem permissions") if annex: if ds.repo.git_annex_version < "8.20200908": # TODO: Drop when GIT_ANNEX_MIN_VERSION is at least 8.20200908. # the standard setup keeps the annex locks accessible to the user only nok_((ds.pathobj / '.git' / 'annex' / 'index.lck').stat().st_mode \ & stat.S_IWGRP) else: # umask might be such (e.g. 002) that group write permissions are inherited, so # for the next test we should check if that is the case on some sample file dltmp_path = ds.pathobj / '.git' / "dltmp" dltmp_path.write_text('') default_grp_write_perms = dltmp_path.stat().st_mode & stat.S_IWGRP dltmp_path.unlink() # the standard setup keeps the annex locks following umask inheritance eq_((ds.pathobj / '.git' / 'annex' / 'index.lck').stat().st_mode \ & stat.S_IWGRP, default_grp_write_perms) # but we can set it up for group-shared access too sharedds = clone(src, sharedpath, reckless='shared-group', result_xfm='datasets', return_type='item-or-list') ok_((sharedds.pathobj / '.git' / 'annex' / 'index.lck').stat().st_mode \ & stat.S_IWGRP)
def test_publish_with_data(origin, src_path, dst_path, sub1_pub, sub2_pub, dst_clone_path): # prepare src source = install(src_path, source=origin, recursive=True) source.repo.get('test-annex.dat') # create plain git at target: target = AnnexRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) # now, set up targets for the submodules: # the need to be annexes, because we want to be able to copy data to them # further down sub1_target = AnnexRepo(sub1_pub, create=True) sub1_target.checkout("TMP", ["-b"]) sub2_target = AnnexRepo(sub2_pub, create=True) sub2_target.checkout("TMP", ["-b"]) sub1 = GitRepo(opj(src_path, 'subm 1'), create=False) sub2 = GitRepo(opj(src_path, '2'), create=False) sub1.add_remote("target", sub1_pub) sub2.add_remote("target", sub2_pub) res = publish(dataset=source, to="target", path=['test-annex.dat'], result_xfm='paths') # first it would publish data and then push # TODO order is not fixed (yet) #eq_(res, [opj(source.path, 'test-annex.dat'), source.path]) eq_(set(res), set([opj(source.path, 'test-annex.dat'), source.path])) # XXX master was not checked out in dst! eq_(list(target.get_branch_commits_("master")), list(source.repo.get_branch_commits_("master"))) # TODO: last commit in git-annex branch differs. Probably fine, # but figure out, when exactly to expect this for proper testing: # yoh: they differ because local annex records information about now # file being available in that remote, and remote one does it via a call in # the hook I guess. So they both get the same information but in two # different commits. I do not observe such behavior of remote having git-annex # automagically updated in older clones # which do not have post-receive hook on remote side eq_( list(target.get_branch_commits_("git-annex"))[1:], list(source.repo.get_branch_commits_("git-annex"))[1:]) # we need compare target/master: target.checkout("master") ok_(target.file_has_content('test-annex.dat')) # make sure that whatever we published is actually consumable dst_clone = install(dst_clone_path, source=dst_path, result_xfm='datasets', return_type='item-or-list') nok_(dst_clone.repo.file_has_content('test-annex.dat')) res = dst_clone.get('test-annex.dat') ok_(dst_clone.repo.file_has_content('test-annex.dat')) res = publish(dataset=source, to="target", path=['.']) # there is nothing to publish on 2nd attempt #eq_(res, ([source, 'test-annex.dat'], [])) assert_result_count(res, 1, status='notneeded') import glob res = publish(dataset=source, to="target", path=glob.glob1(source.path, '*')) # Note: This leads to recursive publishing, since expansion of '*' # contains the submodules themselves in this setup # only the subdatasets, targets are plain git repos, hence # no file content is pushed, all content in super was pushed # before assert_result_count(res, 3) assert_result_count(res, 1, status='ok', path=sub1.path) assert_result_count(res, 1, status='ok', path=sub2.path) assert_result_count(res, 1, status='notneeded', path=source.path) # if we publish again -- nothing to be published res = source.publish(to="target") assert_result_count(res, 1, status='notneeded', path=source.path) # if we drop a file and publish again -- dataset should be published # since git-annex branch was updated source.drop('test-annex.dat') res = source.publish(to="target") assert_result_count(res, 1, status='ok', path=source.path) # and empty again if we try again res = source.publish(to="target") assert_result_count(res, 1, status='notneeded', path=source.path) # data integrity check looks identical from all perspectives # minus "note" statements from git-annex eq_(filter_fsck_error_msg(source.repo.fsck()), filter_fsck_error_msg(source.repo.fsck(remote='target'))) eq_(filter_fsck_error_msg(target.fsck()), filter_fsck_error_msg(source.repo.fsck(remote='target')))
def test_publish_depends(origin, src_path, target1_path, target2_path, target3_path): # prepare src source = install(src_path, source=origin, recursive=True) source.repo.get('test-annex.dat') # pollute config depvar = 'remote.target2.datalad-publish-depends' source.config.add(depvar, 'stupid', where='local') eq_(source.config.get(depvar, None), 'stupid') # two remote sibling on two "different" hosts source.create_sibling('ssh://datalad-test' + target1_path, annex_wanted='standard', annex_group='backup', name='target1') # fails with unknown remote res = source.create_sibling( 'ssh://datalad-test' + target2_path, name='target2', existing='reconfigure', # because 'target2' is known in polluted cfg publish_depends='bogus', on_failure='ignore') assert_result_count( res, 1, path=source.path, status='error', message=('unknown sibling(s) specified as publication dependency: %s', set(['bogus']))) # for real source.create_sibling( 'ssh://datalad-test' + target2_path, name='target2', existing='reconfigure', # because 'target2' is known in polluted cfg annex_wanted='standard', annex_group='backup', publish_depends='target1') # wiped out previous dependencies eq_(source.config.get(depvar, None), 'target1') # and one more remote, on the same host but associated with a dependency source.create_sibling('ssh://datalad-test' + target3_path, name='target3') assert_repo_status(src_path) # introduce change in source create_tree(src_path, {'probe1': 'probe1'}) source.save('probe1') assert_repo_status(src_path) # only the source has the probe ok_file_has_content(opj(src_path, 'probe1'), 'probe1') for p in (target1_path, target2_path, target3_path): assert_false(lexists(opj(p, 'probe1'))) # publish to a standalone remote source.publish(to='target3') ok_(lexists(opj(target3_path, 'probe1'))) # but it has no data copied target3 = Dataset(target3_path) nok_(target3.repo.file_has_content('probe1')) # but if we publish specifying its path, it gets copied source.publish('probe1', to='target3') ok_file_has_content(opj(target3_path, 'probe1'), 'probe1') # no others are affected in either case for p in (target1_path, target2_path): assert_false(lexists(opj(p, 'probe1'))) # publish to all remaining, but via a dependency source.publish(to='target2') for p in (target1_path, target2_path, target3_path): ok_file_has_content(opj(p, 'probe1'), 'probe1')
def test_publish_with_data(origin, src_path, dst_path, sub1_pub, sub2_pub, dst_clone_path): # prepare src source = install(src_path, source=origin, recursive=True) source.repo.get('test-annex.dat') # create plain git at target: target = AnnexRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) # now, set up targets for the submodules: # the need to be annexes, because we want to be able to copy data to them # further down sub1_target = AnnexRepo(sub1_pub, create=True) sub1_target.checkout("TMP", ["-b"]) sub2_target = AnnexRepo(sub2_pub, create=True) sub2_target.checkout("TMP", ["-b"]) sub1 = GitRepo(opj(src_path, 'subm 1'), create=False) sub2 = GitRepo(opj(src_path, '2'), create=False) sub1.add_remote("target", sub1_pub) sub2.add_remote("target", sub2_pub) res = publish(dataset=source, to="target", path=['test-annex.dat'], result_xfm='paths') # first it would publish data and then push # TODO order is not fixed (yet) #eq_(res, [opj(source.path, 'test-annex.dat'), source.path]) eq_(set(res), set([opj(source.path, 'test-annex.dat'), source.path])) # XXX master was not checked out in dst! eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)), list(source.repo.get_branch_commits_(DEFAULT_BRANCH))) assert_git_annex_branch_published(source.repo, target) # we need compare target/<default branch>: target.checkout(DEFAULT_BRANCH) ok_(target.file_has_content('test-annex.dat')) # make sure that whatever we published is actually consumable dst_clone = install(dst_clone_path, source=dst_path, result_xfm='datasets', return_type='item-or-list') nok_(dst_clone.repo.file_has_content('test-annex.dat')) res = dst_clone.get('test-annex.dat') ok_(dst_clone.repo.file_has_content('test-annex.dat')) res = publish(dataset=source, to="target", path=['.']) # there is nothing to publish on 2nd attempt #eq_(res, ([source, 'test-annex.dat'], [])) assert_result_count(res, 1, status='notneeded') import glob res = publish(dataset=source, to="target", path=glob.glob1(source.path, '*')) # Note: This leads to recursive publishing, since expansion of '*' # contains the submodules themselves in this setup # only the subdatasets, targets are plain git repos, hence # no file content is pushed, all content in super was pushed # before assert_result_count(res, 3) assert_result_count(res, 1, status='ok', path=sub1.path) assert_result_count(res, 1, status='ok', path=sub2.path) assert_result_count(res, 1, status='notneeded', path=source.path) # if we publish again -- nothing to be published res = source.publish(to="target") assert_result_count(res, 1, status='notneeded', path=source.path) # if we drop a file and publish again -- dataset should be published # since git-annex branch was updated source.drop('test-annex.dat') res = source.publish(to="target") assert_result_count(res, 1, status='ok', path=source.path) # and empty again if we try again res = source.publish(to="target") assert_result_count(res, 1, status='notneeded', path=source.path) # data integrity check looks identical from all perspectives # minus "note" statements from git-annex eq_(filter_fsck_error_msg(source.repo.fsck()), filter_fsck_error_msg(source.repo.fsck(remote='target'))) eq_(filter_fsck_error_msg(target.fsck()), filter_fsck_error_msg(source.repo.fsck(remote='target')))
def test_publish_recursive(pristine_origin, origin_path, src_path, dst_path, sub1_pub, sub2_pub): # we will be publishing back to origin, so to not alter testrepo # we will first clone it origin = install(origin_path, source=pristine_origin, recursive=True) # prepare src source = install(src_path, source=origin.path, recursive=True) # we will be trying to push into this later on, need to give permissions... origin_sub2 = Dataset(opj(origin_path, '2')) origin_sub2.config.set('receive.denyCurrentBranch', 'updateInstead', where='local') ## TODO this manual fixup is needed due to gh-1548 -- needs proper solution #os.remove(opj(origin_sub2.path, '.git')) #os.rename(opj(origin_path, '.git', 'modules', '2'), opj(origin_sub2.path, '.git')) # create plain git at target: target = GitRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) # subdatasets have no remote yet, so recursive publishing should fail: res = publish(dataset=source, to="target", recursive=True, on_failure='ignore') assert_result_count(res, 3) assert_result_count(res, 1, status='ok', type='dataset', path=source.path) assert_result_count(res, 2, status='error', message=("Unknown target sibling '%s' for publication", 'target')) # now, set up targets for the submodules: sub1_target = GitRepo(sub1_pub, create=True) sub1_target.checkout("TMP", ["-b"]) sub2_target = AnnexRepo(sub2_pub, create=True) # we will be testing presence of the file content, so let's make it progress sub2_target.config.set('receive.denyCurrentBranch', 'updateInstead', where='local') sub1 = GitRepo(opj(src_path, 'subm 1'), create=False) sub2 = GitRepo(opj(src_path, '2'), create=False) sub1.add_remote("target", sub1_pub) sub2.add_remote("target", sub2_pub) # publish recursively with swallow_logs(new_level=logging.DEBUG) as cml: res = publish(dataset=source, to="target", recursive=True) assert_not_in('forced update', cml.out, "we probably haven't merged git-annex before pushing") # testing result list # base dataset was already published above, notneeded again assert_status(('ok', 'notneeded'), res) # nothing failed assert_result_count(res, 3, type='dataset') eq_({r['path'] for r in res}, {src_path, sub1.path, sub2.path}) eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)), list(source.repo.get_branch_commits_(DEFAULT_BRANCH))) assert_git_annex_branch_published(source.repo, target) eq_(list(sub1_target.get_branch_commits_(DEFAULT_BRANCH)), list(sub1.get_branch_commits_(DEFAULT_BRANCH))) assert_git_annex_branch_published(sub1, sub1_target) eq_(list(sub2_target.get_branch_commits_(DEFAULT_BRANCH)), list(sub2.get_branch_commits_(DEFAULT_BRANCH))) assert_git_annex_branch_published(sub2, sub2_target) # we are tracking origin but origin has different git-annex, since we # cloned from it, so it is not aware of our git-annex neq_(list(origin.repo.get_branch_commits_("git-annex")), list(source.repo.get_branch_commits_("git-annex"))) # So if we first publish to it recursively, we would update # all sub-datasets since git-annex branch would need to be pushed res_ = publish(dataset=source, recursive=True) assert_result_count(res_, 1, status='ok', path=source.path) assert_result_count(res_, 1, status='ok', path=sub1.path) assert_result_count(res_, 1, status='ok', path=sub2.path) # and now should carry the same state for git-annex assert_git_annex_branch_published(source.repo, origin.repo) # test for publishing with --since. By default since no changes, nothing pushed res_ = publish(dataset=source, recursive=True) assert_result_count(res_, 3, status='notneeded', type='dataset') # still nothing gets pushed, because origin is up to date res_ = publish(dataset=source, recursive=True, since='HEAD^') assert_result_count(res_, 3, status='notneeded', type='dataset') # and we should not fail if we run it from within the dataset with chpwd(source.path): res_ = publish(recursive=True, since='HEAD^') assert_result_count(res_, 3, status='notneeded', type='dataset') # Let's now update one subm with open(opj(sub2.path, "file.txt"), 'w') as f: f.write('') # add to subdataset, does not alter super dataset! # MIH: use `to_git` because original test author used # and explicit `GitRepo.add` -- keeping this for now Dataset(sub2.path).save('file.txt', to_git=True) # Let's now update one subm create_tree(sub2.path, {'file.dat': 'content'}) # add to subdataset, without reflecting the change in its super(s) Dataset(sub2.path).save('file.dat') # note: will publish to origin here since that is what it tracks res_ = publish(dataset=source, recursive=True, on_failure='ignore') ## only updates published, i.e. just the subdataset, super wasn't altered ## nothing copied! assert_status(('ok', 'notneeded'), res_) assert_result_count(res_, 1, status='ok', path=sub2.path, type='dataset') assert_result_count(res_, 0, path=opj(sub2.path, 'file.dat'), type='file') # since published to origin -- destination should not get that file nok_(lexists(opj(sub2_target.path, 'file.dat'))) res_ = publish(dataset=source, to='target', recursive=True) assert_status(('ok', 'notneeded'), res_) assert_result_count(res_, 1, status='ok', path=sub2.path, type='dataset') assert_result_count(res_, 0, path=opj(sub2.path, 'file.dat'), type='file') # Note: with updateInstead only in target2 and not saving change in # super-dataset we would have made remote dataset, if we had entire # hierarchy, to be somewhat inconsistent. # But here, since target datasets are independent -- it is ok # and the file itself was transferred ok_(lexists(opj(sub2_target.path, 'file.dat'))) nok_(sub2_target.file_has_content('file.dat')) ## but now we can redo publish recursively, with explicitly requested data transfer res_ = publish(dataset=source, to='target', recursive=True, transfer_data='all') ok_(sub2_target.file_has_content('file.dat')) assert_result_count(res_, 1, status='ok', path=opj(sub2.path, 'file.dat')) # Let's save those present changes and publish while implying "since last # merge point" source.save(message="Changes in subm2") # and test if it could deduce the remote/branch to push to source.config.set('branch.{}.remote'.format(DEFAULT_BRANCH), 'target', where='local') with chpwd(source.path): res_ = publish(since='^', recursive=True) # TODO: somehow test that there were no even attempt to diff within "subm 1" # since if `--since=''` worked correctly, nothing has changed there and it # should have not been even touched assert_status(('ok', 'notneeded'), res_) assert_result_count(res_, 1, status='ok', path=source.path, type='dataset') # Don't fail when a string is passed as `dataset` and since="". assert_status("notneeded", publish(since='^', dataset=source.path))
def test_is_datalad_compat_ri(): ok_(is_datalad_compat_ri('file://localhost/some')) ok_(is_datalad_compat_ri('///localhost/some')) nok_(is_datalad_compat_ri('relative')) nok_(is_datalad_compat_ri('.///localhost/some')) nok_(is_datalad_compat_ri(123))