Example #1
0
def _check_ri(ri, cls, exact_str=True, localpath=None, **fields):
    """just a helper to carry out few checks on urls"""
    with swallow_logs(new_level=logging.DEBUG) as cml:
        ri_ = cls(**fields)
        murl = RI(ri)
        eq_(murl.__class__, cls)  # not just a subclass
        eq_(murl, ri_)
        eq_(str(RI(ri)), ri)
        eq_(eval(repr(ri_)), ri)  # repr leads back to identical ri_
        eq_(ri, ri_)  # just in case ;)  above should fail first if smth is wrong
        if not exact_str:
            assert_in('Parsed version of', cml.out)
    (eq_ if exact_str else neq_)(ri, str(ri_))  # that we can reconstruct it EXACTLY on our examples
    # and that we have access to all those fields
    nok_(set(fields).difference(set(cls._FIELDS)))
    for f, v in fields.items():
        eq_(getattr(ri_, f), v)

    if localpath:
        eq_(ri_.localpath, localpath)
        old_localpath = ri_.localpath  # for a test below
    else:
        # if not given -- must be a remote url, should raise exception
        with assert_raises(ValueError):
            ri_.localpath

    # do changes in the path persist?
    old_str = str(ri_)
    ri_.path = newpath = opj(ri_.path, 'sub')
    eq_(ri_.path, newpath)
    neq_(str(ri_), old_str)
    if localpath:
        eq_(ri_.localpath, opj(old_localpath, 'sub'))
Example #2
0
def test_reckless(src, top_path, sharedpath):
    ds = clone(src, top_path, reckless=True,
               result_xfm='datasets', return_type='item-or-list')
    eq_(ds.config.get('annex.hardlink', None), 'true')
    # actual value is 'auto', because True is a legacy value and we map it
    eq_(ds.config.get('datalad.clone.reckless', None), 'auto')
    eq_(ds.repo.repo_info()['untrusted repositories'][0]['here'], True)
    # now, if we clone another repo into this one, it will inherit the setting
    # without having to provide it explicitly
    sub = ds.clone(src, 'sub', result_xfm='datasets', return_type='item-or-list')
    eq_(sub.config.get('datalad.clone.reckless', None), 'auto')
    eq_(sub.config.get('annex.hardlink', None), 'true')

    if ds.repo.is_managed_branch():
        raise SkipTest("Remainder of test needs proper filesystem permissions")

    # the standard setup keeps the annex locks accessible to the user only
    nok_((ds.pathobj / '.git' / 'annex' / 'index.lck').stat().st_mode \
         & stat.S_IWGRP)
    # but we can set it up for group-shared access too
    sharedds = clone(
        src, sharedpath,
        reckless='shared-group',
        result_xfm='datasets',
        return_type='item-or-list')
    ok_((sharedds.pathobj / '.git' / 'annex' / 'index.lck').stat().st_mode \
        & stat.S_IWGRP)
Example #3
0
def test_url_base():
    # Basic checks
    assert_raises(ValueError, URL, "http://example.com", hostname='example.com')
    url = URL("http://example.com")
    eq_(url.hostname, 'example.com')
    eq_(url.scheme, 'http')
    eq_(url.port, '')  # not specified -- empty strings
    eq_(url.username, '')  # not specified -- empty strings
    eq_(repr(url), "URL(hostname='example.com', scheme='http')")
    eq_(url, "http://example.com")  # automagic coercion in __eq__

    neq_(URL(), URL(hostname='x'))

    smth = URL('smth')
    eq_(smth.hostname, '')
    ok_(bool(smth))
    nok_(bool(URL()))

    assert_raises(ValueError, url._set_from_fields, unknown='1')

    with swallow_logs(new_level=logging.WARNING) as cml:
        # we don't "care" about params ATM so there is a warning if there are any
        purl = URL("http://example.com/;param")
        eq_(str(purl), 'http://example.com/;param')  # but we do maintain original string
        assert_in('ParseResults contains params', cml.out)
        eq_(purl.as_str(), 'http://example.com/')
Example #4
0
def test_copy_file(workdir, webdir, weburl):
    workdir = Path(workdir)
    webdir = Path(webdir)
    src_ds = Dataset(workdir / 'src').create()
    # put a file into the dataset by URL and drop it again
    src_ds.download_url('/'.join((weburl, 'webfile1')), path='myfile1.txt')
    src_ds.download_url('/'.join((weburl, 'webfile2')),
                        path=opj('subdir', 'myfile2.txt'))
    ok_file_has_content(src_ds.pathobj / 'myfile1.txt', '123')
    # now create a fresh dataset
    dest_ds = Dataset(workdir / 'dest').create()
    if dest_ds.repo._check_version_kludges("fromkey-supports-unlocked") or \
       not dest_ds.repo.is_managed_branch():
        # unless we have a target ds on a cripples FS (where `annex fromkey`
        # doesn't work until after 8.20210428), we can even drop the file
        # content in the source repo
        src_ds.drop('myfile1.txt', check=False)
        nok_(src_ds.repo.file_has_content('myfile1.txt'))
    # copy the file from the source dataset into it.
    # it must copy enough info to actually put datalad into the position
    # to obtain the file content from the original URL
    dest_ds.copy_file(src_ds.pathobj / 'myfile1.txt')
    dest_ds.get('myfile1.txt')
    ok_file_has_content(dest_ds.pathobj / 'myfile1.txt', '123')
    # purposefully pollute the employed tmp folder to check that we do not trip
    # over such a condition
    tmploc = dest_ds.pathobj / '.git' / 'tmp' / 'datalad-copy' / 'some'
    tmploc.parent.mkdir(parents=True)
    tmploc.touch()
    # copy again, but to different target file name
    # (source+dest pair now)
    dest_ds.copy_file(
        [src_ds.pathobj / 'myfile1.txt', dest_ds.pathobj / 'renamed.txt'])
    ok_file_has_content(dest_ds.pathobj / 'renamed.txt', '123')
    # copying more than one at once
    dest_ds.copy_file([
        src_ds.pathobj / 'myfile1.txt',
        src_ds.pathobj / 'subdir' / 'myfile2.txt', dest_ds.pathobj
    ])
    # copy directly from a non-dataset location
    dest_ds.copy_file(webdir / 'webfile1')

    # copy from annex dataset into gitrepo
    git_ds = Dataset(workdir / 'git').create(annex=False)
    git_ds.copy_file(src_ds.pathobj / 'subdir' / 'myfile2.txt')
Example #5
0
def check_reckless(annex, src_path, top_path, sharedpath):
    # super with or without annex
    src = Dataset(src_path).create(annex=annex)
    # sub always with annex
    srcsub = src.create('sub')

    # and for the actual test
    ds = clone(src.path, top_path, reckless=True,
               result_xfm='datasets', return_type='item-or-list')

    is_crippled = srcsub.repo.is_managed_branch()

    if annex and not is_crippled:
        eq_(ds.config.get('annex.hardlink', None), 'true')

    # actual value is 'auto', because True is a legacy value and we map it
    eq_(ds.config.get('datalad.clone.reckless', None), 'auto')
    if annex:
        eq_(ds.repo.repo_info()['untrusted repositories'][0]['here'], True)
    # now, if we clone another repo into this one, it will inherit the setting
    # without having to provide it explicitly
    newsub = ds.clone(srcsub, 'newsub', result_xfm='datasets', return_type='item-or-list')
    # and `get` the original subdataset
    origsub = ds.get('sub', result_xfm='datasets', return_type='item-or-list')
    for sds in (newsub, origsub):
        eq_(sds.config.get('datalad.clone.reckless', None), 'auto')
        if not is_crippled:
            eq_(sds.config.get('annex.hardlink', None), 'true')

    if is_crippled:
        raise SkipTest("Remainder of test needs proper filesystem permissions")

    if annex:
        # the standard setup keeps the annex locks accessible to the user only
        nok_((ds.pathobj / '.git' / 'annex' / 'index.lck').stat().st_mode \
             & stat.S_IWGRP)
        # but we can set it up for group-shared access too
        sharedds = clone(
            src, sharedpath,
            reckless='shared-group',
            result_xfm='datasets',
            return_type='item-or-list')
        ok_((sharedds.pathobj / '.git' / 'annex' / 'index.lck').stat().st_mode \
            & stat.S_IWGRP)
Example #6
0
def test_path_startswith():
    ok_(path_startswith('/a/b', '/a'))
    ok_(path_startswith('/a/b', '/'))
    ok_(path_startswith('/aaa/b/c', '/aaa'))
    nok_(path_startswith('/aaa/b/c', '/aa'))
    nok_(path_startswith('/a/b', '/a/c'))
    nok_(path_startswith('/a/b/c', '/a/c'))
Example #7
0
def test_copy_file_prevent_dotgit_placement(srcpath, destpath):
    src = Dataset(srcpath).create()
    sub = src.create('sub')
    dest = Dataset(destpath).create()

    # recursion doesn't capture .git/
    dest.copy_file(sub.path, recursive=True)
    nok_((dest.pathobj / 'sub' / '.git').exists())

    # explicit instruction results in failure
    assert_status(
        'impossible',
        dest.copy_file(sub.pathobj / '.git',
                       recursive=True,
                       on_failure='ignore'))

    # same when the source has an OK name, but the dest now
    assert_in_results(dest.copy_file(
        [sub.pathobj / '.git' / 'config', dest.pathobj / 'some', '.git'],
        on_failure='ignore'),
                      status='impossible',
                      action='copy_file')
Example #8
0
def test_is_datalad_compat_ri():
    ok_(is_datalad_compat_ri('ssh://*****:*****@host/path'))
    ok_(is_datalad_compat_ri('http://example.com'))
    ok_(is_datalad_compat_ri('file://localhost/some'))
    ok_(is_datalad_compat_ri('///localhost/some'))
    nok_(is_datalad_compat_ri('relative'))
    nok_(is_datalad_compat_ri('.///localhost/some'))
    nok_(is_datalad_compat_ri(123))
Example #9
0
def test_path_startswith():
    ok_(path_startswith('/a/b', '/a'))
    ok_(path_startswith('/a/b', '/a/b'))
    ok_(path_startswith('/a/b', '/a/b/'))
    ok_(path_startswith('/a/b/', '/a/b'))
    ok_(path_startswith('/a/b', '/'))
    ok_(path_startswith('/aaa/b/c', '/aaa'))
    nok_(path_startswith('/aaa/b/c', '/aa'))
    nok_(path_startswith('/a/b', '/a/c'))
    nok_(path_startswith('/a/b/c', '/a/c'))
    # must not mix relative and abs
    assert_raises(ValueError, path_startswith, 'a/b', '/a')
    assert_raises(ValueError, path_startswith, '/a/b', 'a')
Example #10
0
def test_path_startswith():
    ok_(path_startswith('/a/b', '/a'))
    ok_(path_startswith('/a/b', '/a/b'))
    ok_(path_startswith('/a/b', '/a/b/'))
    ok_(path_startswith('/a/b/', '/a/b'))
    ok_(path_startswith('/a/b', '/'))
    ok_(path_startswith('/aaa/b/c', '/aaa'))
    nok_(path_startswith('/aaa/b/c', '/aa'))
    nok_(path_startswith('/a/b', '/a/c'))
    nok_(path_startswith('/a/b/c', '/a/c'))
    # must not mix relative and abs
    assert_raises(ValueError, path_startswith, 'a/b', '/a')
    assert_raises(ValueError, path_startswith, '/a/b', 'a')
Example #11
0
def test_copy_file_prevent_dotgit_placement(srcpath, destpath):
    src = Dataset(srcpath).create()
    sub = src.create('sub')
    dest = Dataset(destpath).create()

    # recursion doesn't capture .git/
    dest.copy_file(sub.path, recursive=True)
    nok_((dest.pathobj / 'sub' / '.git').exists())

    # explicit instruction results in failure
    assert_status(
        'impossible',
        dest.copy_file(sub.pathobj / '.git',
                       recursive=True,
                       on_failure='ignore'))

    # same when the source has an OK name, but the dest now
    assert_in_results(dest.copy_file(
        [sub.pathobj / '.git' / 'config', dest.pathobj / 'some' / '.git'],
        on_failure='ignore'),
                      status='impossible',
                      action='copy_file')

    # The last path above wasn't treated as a target directory because it
    # wasn't an existing directory. We also guard against a '.git' in the
    # target directory code path, though the handling is different.
    with assert_raises(ValueError):
        dest.copy_file(
            [sub.pathobj / '.git' / 'config', dest.pathobj / '.git'])

    # A source path can have a leading .git/ if the destination is outside of
    # .git/.
    nok_((dest.pathobj / "config").exists())
    dest.copy_file(sub.pathobj / '.git' / 'config')
    ok_((dest.pathobj / "config").exists())

    target = dest.pathobj / 'some'
    nok_(target.exists())
    dest.copy_file([sub.pathobj / '.git' / 'config', target])
    ok_(target.exists())

    # But we only waste so many cycles trying to prevent foot shooting. This
    # next one sneaks by because only .name, not all upstream parts, is checked
    # for each destination that comes out of _yield_specs().
    badobj = dest.pathobj / '.git' / 'objects' / 'i-do-not-exist'
    dest.copy_file([sub.pathobj / '.git' / 'config', badobj])
    ok_(badobj.exists())
Example #12
0
def test_is_url():
    ok_(is_url('file://localhost/some'))
    ok_(is_url('http://localhost'))
    ok_(is_url('ssh://me@localhost'))
    # in current understanding it is indeed a url but an 'ssh', implicit=True, not just
    # a useless scheme=weired with a hope to point to a netloc
    with swallow_logs():
        ok_(is_url('weired://'))
    nok_(is_url('relative'))
    nok_(is_url('/absolute'))
    ok_(is_url('like@sshlogin'))  # actually we do allow ssh:implicit urls ATM
    nok_(is_url(''))
    nok_(is_url(' '))
    nok_(is_url(123))  # stuff of other types wouldn't be considered a URL

    # we can pass RI instance directly
    ok_(is_url(RI('file://localhost/some')))
    nok_(is_url(RI('relative')))
Example #13
0
def check_reckless(annex, src_path, top_path, sharedpath):
    # super with or without annex
    src = Dataset(src_path).create(annex=annex)
    # sub always with annex
    srcsub = src.create('sub')

    # and for the actual test
    ds = clone(src.path,
               top_path,
               reckless=True,
               result_xfm='datasets',
               return_type='item-or-list')

    is_crippled = srcsub.repo.is_managed_branch()

    if annex and not is_crippled:
        eq_(ds.config.get('annex.hardlink', None), 'true')

    # actual value is 'auto', because True is a legacy value and we map it
    eq_(ds.config.get('datalad.clone.reckless', None), 'auto')
    if annex:
        eq_(ds.repo.repo_info()['untrusted repositories'][0]['here'], True)
    # now, if we clone another repo into this one, it will inherit the setting
    # without having to provide it explicitly
    newsub = ds.clone(srcsub,
                      'newsub',
                      result_xfm='datasets',
                      return_type='item-or-list')
    # and `get` the original subdataset
    origsub = ds.get('sub', result_xfm='datasets', return_type='item-or-list')
    for sds in (newsub, origsub):
        eq_(sds.config.get('datalad.clone.reckless', None), 'auto')
        if not is_crippled:
            eq_(sds.config.get('annex.hardlink', None), 'true')

    if is_crippled:
        raise SkipTest("Remainder of test needs proper filesystem permissions")

    if annex:
        if ds.repo.git_annex_version < "8.20200908":
            # TODO: Drop when GIT_ANNEX_MIN_VERSION is at least 8.20200908.

            # the standard setup keeps the annex locks accessible to the user only
            nok_((ds.pathobj / '.git' / 'annex' / 'index.lck').stat().st_mode \
                 & stat.S_IWGRP)
        else:
            # umask might be such (e.g. 002) that group write permissions are inherited, so
            # for the next test we should check if that is the case on some sample file
            dltmp_path = ds.pathobj / '.git' / "dltmp"
            dltmp_path.write_text('')
            default_grp_write_perms = dltmp_path.stat().st_mode & stat.S_IWGRP
            dltmp_path.unlink()
            # the standard setup keeps the annex locks following umask inheritance
            eq_((ds.pathobj / '.git' / 'annex' / 'index.lck').stat().st_mode \
                 & stat.S_IWGRP, default_grp_write_perms)

        # but we can set it up for group-shared access too
        sharedds = clone(src,
                         sharedpath,
                         reckless='shared-group',
                         result_xfm='datasets',
                         return_type='item-or-list')
        ok_((sharedds.pathobj / '.git' / 'annex' / 'index.lck').stat().st_mode \
            & stat.S_IWGRP)
Example #14
0
def test_publish_with_data(origin, src_path, dst_path, sub1_pub, sub2_pub,
                           dst_clone_path):

    # prepare src
    source = install(src_path, source=origin, recursive=True)
    source.repo.get('test-annex.dat')

    # create plain git at target:
    target = AnnexRepo(dst_path, create=True)
    target.checkout("TMP", ["-b"])
    source.repo.add_remote("target", dst_path)

    # now, set up targets for the submodules:
    # the need to be annexes, because we want to be able to copy data to them
    # further down
    sub1_target = AnnexRepo(sub1_pub, create=True)
    sub1_target.checkout("TMP", ["-b"])
    sub2_target = AnnexRepo(sub2_pub, create=True)
    sub2_target.checkout("TMP", ["-b"])
    sub1 = GitRepo(opj(src_path, 'subm 1'), create=False)
    sub2 = GitRepo(opj(src_path, '2'), create=False)
    sub1.add_remote("target", sub1_pub)
    sub2.add_remote("target", sub2_pub)

    res = publish(dataset=source,
                  to="target",
                  path=['test-annex.dat'],
                  result_xfm='paths')
    # first it would publish data and then push
    # TODO order is not fixed (yet)
    #eq_(res, [opj(source.path, 'test-annex.dat'), source.path])
    eq_(set(res), set([opj(source.path, 'test-annex.dat'), source.path]))
    # XXX master was not checked out in dst!

    eq_(list(target.get_branch_commits_("master")),
        list(source.repo.get_branch_commits_("master")))
    # TODO: last commit in git-annex branch differs. Probably fine,
    # but figure out, when exactly to expect this for proper testing:
    # yoh: they differ because local annex records information about now
    # file being available in that remote, and remote one does it via a call in
    # the hook I guess.  So they both get the same information but in two
    # different commits.  I do not observe such behavior of remote having git-annex
    # automagically updated in older clones
    # which do not have post-receive hook on remote side
    eq_(
        list(target.get_branch_commits_("git-annex"))[1:],
        list(source.repo.get_branch_commits_("git-annex"))[1:])

    # we need compare target/master:
    target.checkout("master")
    ok_(target.file_has_content('test-annex.dat'))

    # make sure that whatever we published is actually consumable
    dst_clone = install(dst_clone_path,
                        source=dst_path,
                        result_xfm='datasets',
                        return_type='item-or-list')
    nok_(dst_clone.repo.file_has_content('test-annex.dat'))
    res = dst_clone.get('test-annex.dat')
    ok_(dst_clone.repo.file_has_content('test-annex.dat'))

    res = publish(dataset=source, to="target", path=['.'])
    # there is nothing to publish on 2nd attempt
    #eq_(res, ([source, 'test-annex.dat'], []))
    assert_result_count(res, 1, status='notneeded')

    import glob
    res = publish(dataset=source,
                  to="target",
                  path=glob.glob1(source.path, '*'))
    # Note: This leads to recursive publishing, since expansion of '*'
    #       contains the submodules themselves in this setup

    # only the subdatasets, targets are plain git repos, hence
    # no file content is pushed, all content in super was pushed
    # before
    assert_result_count(res, 3)
    assert_result_count(res, 1, status='ok', path=sub1.path)
    assert_result_count(res, 1, status='ok', path=sub2.path)
    assert_result_count(res, 1, status='notneeded', path=source.path)

    # if we publish again -- nothing to be published
    res = source.publish(to="target")
    assert_result_count(res, 1, status='notneeded', path=source.path)
    # if we drop a file and publish again -- dataset should be published
    # since git-annex branch was updated
    source.drop('test-annex.dat')
    res = source.publish(to="target")
    assert_result_count(res, 1, status='ok', path=source.path)
    # and empty again if we try again
    res = source.publish(to="target")
    assert_result_count(res, 1, status='notneeded', path=source.path)

    # data integrity check looks identical from all perspectives
    # minus "note" statements from git-annex
    eq_(filter_fsck_error_msg(source.repo.fsck()),
        filter_fsck_error_msg(source.repo.fsck(remote='target')))
    eq_(filter_fsck_error_msg(target.fsck()),
        filter_fsck_error_msg(source.repo.fsck(remote='target')))
Example #15
0
def test_publish_depends(origin, src_path, target1_path, target2_path,
                         target3_path):
    # prepare src
    source = install(src_path, source=origin, recursive=True)
    source.repo.get('test-annex.dat')
    # pollute config
    depvar = 'remote.target2.datalad-publish-depends'
    source.config.add(depvar, 'stupid', where='local')
    eq_(source.config.get(depvar, None), 'stupid')

    # two remote sibling on two "different" hosts
    source.create_sibling('ssh://datalad-test' + target1_path,
                          annex_wanted='standard',
                          annex_group='backup',
                          name='target1')
    # fails with unknown remote
    res = source.create_sibling(
        'ssh://datalad-test' + target2_path,
        name='target2',
        existing='reconfigure',  # because 'target2' is known in polluted cfg
        publish_depends='bogus',
        on_failure='ignore')
    assert_result_count(
        res,
        1,
        path=source.path,
        status='error',
        message=('unknown sibling(s) specified as publication dependency: %s',
                 set(['bogus'])))
    # for real
    source.create_sibling(
        'ssh://datalad-test' + target2_path,
        name='target2',
        existing='reconfigure',  # because 'target2' is known in polluted cfg
        annex_wanted='standard',
        annex_group='backup',
        publish_depends='target1')
    # wiped out previous dependencies
    eq_(source.config.get(depvar, None), 'target1')
    # and one more remote, on the same host but associated with a dependency
    source.create_sibling('ssh://datalad-test' + target3_path, name='target3')
    assert_repo_status(src_path)
    # introduce change in source
    create_tree(src_path, {'probe1': 'probe1'})
    source.save('probe1')
    assert_repo_status(src_path)
    # only the source has the probe
    ok_file_has_content(opj(src_path, 'probe1'), 'probe1')
    for p in (target1_path, target2_path, target3_path):
        assert_false(lexists(opj(p, 'probe1')))
    # publish to a standalone remote
    source.publish(to='target3')
    ok_(lexists(opj(target3_path, 'probe1')))
    # but it has no data copied
    target3 = Dataset(target3_path)
    nok_(target3.repo.file_has_content('probe1'))

    # but if we publish specifying its path, it gets copied
    source.publish('probe1', to='target3')
    ok_file_has_content(opj(target3_path, 'probe1'), 'probe1')

    # no others are affected in either case
    for p in (target1_path, target2_path):
        assert_false(lexists(opj(p, 'probe1')))

    # publish to all remaining, but via a dependency
    source.publish(to='target2')
    for p in (target1_path, target2_path, target3_path):
        ok_file_has_content(opj(p, 'probe1'), 'probe1')
Example #16
0
def test_publish_with_data(origin, src_path, dst_path, sub1_pub, sub2_pub,
                           dst_clone_path):

    # prepare src
    source = install(src_path, source=origin, recursive=True)
    source.repo.get('test-annex.dat')

    # create plain git at target:
    target = AnnexRepo(dst_path, create=True)
    target.checkout("TMP", ["-b"])
    source.repo.add_remote("target", dst_path)

    # now, set up targets for the submodules:
    # the need to be annexes, because we want to be able to copy data to them
    # further down
    sub1_target = AnnexRepo(sub1_pub, create=True)
    sub1_target.checkout("TMP", ["-b"])
    sub2_target = AnnexRepo(sub2_pub, create=True)
    sub2_target.checkout("TMP", ["-b"])
    sub1 = GitRepo(opj(src_path, 'subm 1'), create=False)
    sub2 = GitRepo(opj(src_path, '2'), create=False)
    sub1.add_remote("target", sub1_pub)
    sub2.add_remote("target", sub2_pub)

    res = publish(dataset=source,
                  to="target",
                  path=['test-annex.dat'],
                  result_xfm='paths')
    # first it would publish data and then push
    # TODO order is not fixed (yet)
    #eq_(res, [opj(source.path, 'test-annex.dat'), source.path])
    eq_(set(res), set([opj(source.path, 'test-annex.dat'), source.path]))
    # XXX master was not checked out in dst!

    eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)),
        list(source.repo.get_branch_commits_(DEFAULT_BRANCH)))
    assert_git_annex_branch_published(source.repo, target)

    # we need compare target/<default branch>:
    target.checkout(DEFAULT_BRANCH)
    ok_(target.file_has_content('test-annex.dat'))

    # make sure that whatever we published is actually consumable
    dst_clone = install(dst_clone_path,
                        source=dst_path,
                        result_xfm='datasets',
                        return_type='item-or-list')
    nok_(dst_clone.repo.file_has_content('test-annex.dat'))
    res = dst_clone.get('test-annex.dat')
    ok_(dst_clone.repo.file_has_content('test-annex.dat'))

    res = publish(dataset=source, to="target", path=['.'])
    # there is nothing to publish on 2nd attempt
    #eq_(res, ([source, 'test-annex.dat'], []))
    assert_result_count(res, 1, status='notneeded')

    import glob
    res = publish(dataset=source,
                  to="target",
                  path=glob.glob1(source.path, '*'))
    # Note: This leads to recursive publishing, since expansion of '*'
    #       contains the submodules themselves in this setup

    # only the subdatasets, targets are plain git repos, hence
    # no file content is pushed, all content in super was pushed
    # before
    assert_result_count(res, 3)
    assert_result_count(res, 1, status='ok', path=sub1.path)
    assert_result_count(res, 1, status='ok', path=sub2.path)
    assert_result_count(res, 1, status='notneeded', path=source.path)

    # if we publish again -- nothing to be published
    res = source.publish(to="target")
    assert_result_count(res, 1, status='notneeded', path=source.path)
    # if we drop a file and publish again -- dataset should be published
    # since git-annex branch was updated
    source.drop('test-annex.dat')
    res = source.publish(to="target")
    assert_result_count(res, 1, status='ok', path=source.path)
    # and empty again if we try again
    res = source.publish(to="target")
    assert_result_count(res, 1, status='notneeded', path=source.path)

    # data integrity check looks identical from all perspectives
    # minus "note" statements from git-annex
    eq_(filter_fsck_error_msg(source.repo.fsck()),
        filter_fsck_error_msg(source.repo.fsck(remote='target')))
    eq_(filter_fsck_error_msg(target.fsck()),
        filter_fsck_error_msg(source.repo.fsck(remote='target')))
Example #17
0
def test_publish_recursive(pristine_origin, origin_path, src_path, dst_path,
                           sub1_pub, sub2_pub):

    # we will be publishing back to origin, so to not alter testrepo
    # we will first clone it
    origin = install(origin_path, source=pristine_origin, recursive=True)
    # prepare src
    source = install(src_path, source=origin.path, recursive=True)
    # we will be trying to push into this later on, need to give permissions...
    origin_sub2 = Dataset(opj(origin_path, '2'))
    origin_sub2.config.set('receive.denyCurrentBranch',
                           'updateInstead',
                           where='local')
    ## TODO this manual fixup is needed due to gh-1548 -- needs proper solution
    #os.remove(opj(origin_sub2.path, '.git'))
    #os.rename(opj(origin_path, '.git', 'modules', '2'), opj(origin_sub2.path, '.git'))

    # create plain git at target:
    target = GitRepo(dst_path, create=True)
    target.checkout("TMP", ["-b"])
    source.repo.add_remote("target", dst_path)

    # subdatasets have no remote yet, so recursive publishing should fail:
    res = publish(dataset=source,
                  to="target",
                  recursive=True,
                  on_failure='ignore')
    assert_result_count(res, 3)
    assert_result_count(res, 1, status='ok', type='dataset', path=source.path)
    assert_result_count(res,
                        2,
                        status='error',
                        message=("Unknown target sibling '%s' for publication",
                                 'target'))

    # now, set up targets for the submodules:
    sub1_target = GitRepo(sub1_pub, create=True)
    sub1_target.checkout("TMP", ["-b"])
    sub2_target = AnnexRepo(sub2_pub, create=True)
    # we will be testing presence of the file content, so let's make it progress
    sub2_target.config.set('receive.denyCurrentBranch',
                           'updateInstead',
                           where='local')
    sub1 = GitRepo(opj(src_path, 'subm 1'), create=False)
    sub2 = GitRepo(opj(src_path, '2'), create=False)
    sub1.add_remote("target", sub1_pub)
    sub2.add_remote("target", sub2_pub)

    # publish recursively
    with swallow_logs(new_level=logging.DEBUG) as cml:
        res = publish(dataset=source, to="target", recursive=True)
        assert_not_in('forced update', cml.out,
                      "we probably haven't merged git-annex before pushing")

    # testing result list
    # base dataset was already published above, notneeded again
    assert_status(('ok', 'notneeded'), res)  # nothing failed
    assert_result_count(res, 3, type='dataset')
    eq_({r['path'] for r in res}, {src_path, sub1.path, sub2.path})

    eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)),
        list(source.repo.get_branch_commits_(DEFAULT_BRANCH)))
    assert_git_annex_branch_published(source.repo, target)
    eq_(list(sub1_target.get_branch_commits_(DEFAULT_BRANCH)),
        list(sub1.get_branch_commits_(DEFAULT_BRANCH)))
    assert_git_annex_branch_published(sub1, sub1_target)
    eq_(list(sub2_target.get_branch_commits_(DEFAULT_BRANCH)),
        list(sub2.get_branch_commits_(DEFAULT_BRANCH)))
    assert_git_annex_branch_published(sub2, sub2_target)

    # we are tracking origin but origin has different git-annex, since we
    # cloned from it, so it is not aware of our git-annex
    neq_(list(origin.repo.get_branch_commits_("git-annex")),
         list(source.repo.get_branch_commits_("git-annex")))
    # So if we first publish to it recursively, we would update
    # all sub-datasets since git-annex branch would need to be pushed
    res_ = publish(dataset=source, recursive=True)
    assert_result_count(res_, 1, status='ok', path=source.path)
    assert_result_count(res_, 1, status='ok', path=sub1.path)
    assert_result_count(res_, 1, status='ok', path=sub2.path)
    # and now should carry the same state for git-annex
    assert_git_annex_branch_published(source.repo, origin.repo)

    # test for publishing with  --since.  By default since no changes, nothing pushed
    res_ = publish(dataset=source, recursive=True)
    assert_result_count(res_, 3, status='notneeded', type='dataset')

    # still nothing gets pushed, because origin is up to date
    res_ = publish(dataset=source, recursive=True, since='HEAD^')
    assert_result_count(res_, 3, status='notneeded', type='dataset')

    # and we should not fail if we run it from within the dataset
    with chpwd(source.path):
        res_ = publish(recursive=True, since='HEAD^')
        assert_result_count(res_, 3, status='notneeded', type='dataset')

    # Let's now update one subm
    with open(opj(sub2.path, "file.txt"), 'w') as f:
        f.write('')
    # add to subdataset, does not alter super dataset!
    # MIH: use `to_git` because original test author used
    # and explicit `GitRepo.add` -- keeping this for now
    Dataset(sub2.path).save('file.txt', to_git=True)

    # Let's now update one subm
    create_tree(sub2.path, {'file.dat': 'content'})
    # add to subdataset, without reflecting the change in its super(s)
    Dataset(sub2.path).save('file.dat')

    # note: will publish to origin here since that is what it tracks
    res_ = publish(dataset=source, recursive=True, on_failure='ignore')
    ## only updates published, i.e. just the subdataset, super wasn't altered
    ## nothing copied!
    assert_status(('ok', 'notneeded'), res_)
    assert_result_count(res_, 1, status='ok', path=sub2.path, type='dataset')
    assert_result_count(res_, 0, path=opj(sub2.path, 'file.dat'), type='file')

    # since published to origin -- destination should not get that file
    nok_(lexists(opj(sub2_target.path, 'file.dat')))
    res_ = publish(dataset=source, to='target', recursive=True)
    assert_status(('ok', 'notneeded'), res_)
    assert_result_count(res_, 1, status='ok', path=sub2.path, type='dataset')
    assert_result_count(res_, 0, path=opj(sub2.path, 'file.dat'), type='file')

    # Note: with updateInstead only in target2 and not saving change in
    # super-dataset we would have made remote dataset, if we had entire
    # hierarchy, to be somewhat inconsistent.
    # But here, since target datasets are independent -- it is ok

    # and the file itself was transferred
    ok_(lexists(opj(sub2_target.path, 'file.dat')))
    nok_(sub2_target.file_has_content('file.dat'))

    ## but now we can redo publish recursively, with explicitly requested data transfer
    res_ = publish(dataset=source,
                   to='target',
                   recursive=True,
                   transfer_data='all')
    ok_(sub2_target.file_has_content('file.dat'))
    assert_result_count(res_, 1, status='ok', path=opj(sub2.path, 'file.dat'))

    # Let's save those present changes and publish while implying "since last
    # merge point"
    source.save(message="Changes in subm2")
    # and test if it could deduce the remote/branch to push to
    source.config.set('branch.{}.remote'.format(DEFAULT_BRANCH),
                      'target',
                      where='local')
    with chpwd(source.path):
        res_ = publish(since='^', recursive=True)
    # TODO: somehow test that there were no even attempt to diff within "subm 1"
    # since if `--since=''` worked correctly, nothing has changed there and it
    # should have not been even touched
    assert_status(('ok', 'notneeded'), res_)
    assert_result_count(res_, 1, status='ok', path=source.path, type='dataset')

    # Don't fail when a string is passed as `dataset` and since="".
    assert_status("notneeded", publish(since='^', dataset=source.path))
Example #18
0
def test_is_datalad_compat_ri():
    ok_(is_datalad_compat_ri('file://localhost/some'))
    ok_(is_datalad_compat_ri('///localhost/some'))
    nok_(is_datalad_compat_ri('relative'))
    nok_(is_datalad_compat_ri('.///localhost/some'))
    nok_(is_datalad_compat_ri(123))