Exemple #1
0
def test_url_base():
    # Basic checks
    assert_raises(ValueError,
                  URL,
                  "http://example.com",
                  hostname='example.com')
    url = URL("http://example.com")
    eq_(url.hostname, 'example.com')
    eq_(url.scheme, 'http')
    eq_(url.port, '')  # not specified -- empty strings
    eq_(url.username, '')  # not specified -- empty strings
    eq_(repr(url), "URL(hostname='example.com', scheme='http')")
    eq_(url, "http://example.com")  # automagic coercion in __eq__

    neq_(URL(), URL(hostname='x'))

    smth = URL('smth')
    eq_(smth.hostname, '')
    ok_(bool(smth))
    nok_(bool(URL()))

    assert_raises(ValueError, url._set_from_fields, unknown='1')

    with swallow_logs(new_level=logging.WARNING) as cml:
        # we don't "care" about params ATM so there is a warning if there are any
        purl = URL("http://example.com/;param")
        eq_(str(purl),
            'http://example.com/;param')  # but we do maintain original string
        assert_in('ParseResults contains params', cml.out)
        eq_(purl.as_str(), 'http://example.com/')
Exemple #2
0
def test_no_interaction_with_untracked_content(path=None):
    # extracted from what was a metadata test originally
    ds = Dataset(op.join(path, 'origin')).create(force=True)
    create_tree(ds.path, {'sub': {'subsub': {'dat': 'lots of data'}}})
    subds = ds.create('sub', force=True)
    subds.remove(op.join('.datalad', 'config'))
    nok_((subds.pathobj / '.datalad' / 'config').exists())
    # this will only work, if `remove` didn't do anything stupid and
    # caused all content to be saved
    subds.create('subsub', force=True)
Exemple #3
0
def test_kill(path=None):
    # nested datasets with load
    ds = Dataset(path).create()
    (ds.pathobj / 'file.dat').write_text('load')
    ds.save("file.dat")
    subds = ds.create('deep1')
    eq_(sorted(ds.subdatasets(result_xfm='relpaths')), ['deep1'])
    assert_repo_status(ds.path)

    # and we fail to remove for many reasons
    # - unpushed commits
    # - a subdataset present
    # - unique annex key
    res = ds.remove(on_failure='ignore')
    assert_result_count(res, 1, status='error', path=ds.path)
    eq_(ds.remove(reckless='availability', result_xfm='datasets'), [subds, ds])
    nok_(ds.pathobj.exists())
Exemple #4
0
def test_copy_file(workdir=None, webdir=None, weburl=None):
    workdir = Path(workdir)
    webdir = Path(webdir)
    src_ds = Dataset(workdir / 'src').create()
    # put a file into the dataset by URL and drop it again
    src_ds.download_url('/'.join((weburl, 'webfile1')), path='myfile1.txt')
    src_ds.download_url('/'.join((weburl, 'webfile2')),
                        path=opj('subdir', 'myfile2.txt'))
    ok_file_has_content(src_ds.pathobj / 'myfile1.txt', '123')
    # now create a fresh dataset
    dest_ds = Dataset(workdir / 'dest').create()
    if dest_ds.repo._check_version_kludges("fromkey-supports-unlocked") or \
       not dest_ds.repo.is_managed_branch():
        # unless we have a target ds on a cripples FS (where `annex fromkey`
        # doesn't work until after 8.20210428), we can even drop the file
        # content in the source repo
        src_ds.drop('myfile1.txt', reckless='kill')
        nok_(src_ds.repo.file_has_content('myfile1.txt'))
    # copy the file from the source dataset into it.
    # it must copy enough info to actually put datalad into the position
    # to obtain the file content from the original URL
    dest_ds.copy_file(src_ds.pathobj / 'myfile1.txt')
    dest_ds.get('myfile1.txt')
    ok_file_has_content(dest_ds.pathobj / 'myfile1.txt', '123')
    # purposefully pollute the employed tmp folder to check that we do not trip
    # over such a condition
    tmploc = dest_ds.pathobj / '.git' / 'tmp' / 'datalad-copy' / 'some'
    tmploc.parent.mkdir(parents=True)
    tmploc.touch()
    # copy again, but to different target file name
    # (source+dest pair now)
    dest_ds.copy_file(
        [src_ds.pathobj / 'myfile1.txt', dest_ds.pathobj / 'renamed.txt'])
    ok_file_has_content(dest_ds.pathobj / 'renamed.txt', '123')
    # copying more than one at once
    dest_ds.copy_file([
        src_ds.pathobj / 'myfile1.txt',
        src_ds.pathobj / 'subdir' / 'myfile2.txt', dest_ds.pathobj
    ])
    # copy directly from a non-dataset location
    dest_ds.copy_file(webdir / 'webfile1')

    # copy from annex dataset into gitrepo
    git_ds = Dataset(workdir / 'git').create(annex=False)
    git_ds.copy_file(src_ds.pathobj / 'subdir' / 'myfile2.txt')
Exemple #5
0
def _check_ri(ri, cls, exact_str=True, localpath=None, **fields):
    """just a helper to carry out few checks on urls"""
    with swallow_logs(new_level=logging.DEBUG) as cml:
        ri_ = cls(**fields)
        murl = RI(ri)
        eq_(murl.__class__, cls)  # not just a subclass
        eq_(murl, ri_)
        if isinstance(ri, str):
            eq_(str(RI(ri)), ri)
        eq_(eval(repr(ri_)), ri)  # repr leads back to identical ri_
        eq_(ri,
            ri_)  # just in case ;)  above should fail first if smth is wrong
        if not exact_str:
            assert_in('Parsed version of', cml.out)
    (eq_ if exact_str else neq_)(
        str(ri),
        str(ri_))  # that we can reconstruct it EXACTLY on our examples
    # and that we have access to all those fields
    nok_(set(fields).difference(set(cls._FIELDS)))
    for f, v in fields.items():
        eq_(getattr(ri_, f), v)

    if localpath:
        eq_(ri_.localpath, localpath)
        old_localpath = ri_.localpath  # for a test below
    else:
        # if not given -- must be a remote url, should raise exception
        with assert_raises(ValueError):
            ri_.localpath

    # This one does not have a path. TODO: either proxy path from its .RI or adjust
    # hierarchy of classes to make it more explicit
    if cls == GitTransportRI:
        return
    # do changes in the path persist?
    old_str = str(ri_)
    ri_.path = newpath = opj(ri_.path, 'sub')
    eq_(ri_.path, newpath)
    neq_(str(ri_), old_str)
    if localpath:
        eq_(ri_.localpath, opj(old_localpath, 'sub'))
Exemple #6
0
def test_is_datalad_compat_ri():
    ok_(is_datalad_compat_ri('ssh://*****:*****@host/path'))
    ok_(is_datalad_compat_ri('http://example.com'))
    ok_(is_datalad_compat_ri('file://localhost/some'))
    ok_(is_datalad_compat_ri('///localhost/some'))
    nok_(is_datalad_compat_ri('relative'))
    nok_(is_datalad_compat_ri('.///localhost/some'))
    nok_(is_datalad_compat_ri(123))
Exemple #7
0
def test_register_config():
    nok_(has_config('datalad.testdummies.invalid'))
    assert_raises(
        ValueError,
        register_config,
        'datalad.testdummies.invalid',
        title=None,
        dialog='yesno')
    nok_(has_config('datalad.testdummies.invalid'))

    cfgkey = 'datalad.testdummies.try1'
    nok_(has_config(cfgkey))
    register_config(
        cfgkey,
        'This is what happens, when you do not listen to mama!',
        default_fn=lambda: 5,
        description='Try on-access default "computation"',
        type=int,
        dialog='question',
        scope='global',
    )

    from datalad.interface.common_cfg import definitions
    assert_in(cfgkey, definitions)
    # same thing, other part of the API
    assert_in(cfgkey, definitions.keys())
    # and yet another
    assert_in(cfgkey, [k for k, v in definitions.items()])
    # one more still
    assert_in(cfgkey, [k for k in definitions])
    # more smoke testing, we must have at least this one
    ok_(len(definitions))

    df = definitions[cfgkey]
    # on access default computation
    eq_(df['default'], 5)

    # we could set any novel property
    df['novel'] = 'unexpected'
    eq_(df.get('novel'), 'unexpected')
    eq_(df.get('toonovel'), None)
    # smoke test str/repr
    assert_in('mama', str(df))
    assert_in('mama', repr(df))

    # internal data structure for UI was assembled
    assert_in('ui', df)
    # more smoke
    assert_in('ui', df.keys())
    assert_in('ui', [k for k in df])
    nkeys = len(df)
    df.update(funky='seven')
    eq_(len(df), nkeys + 1)
Exemple #8
0
def test_copy_file_prevent_dotgit_placement(srcpath=None, destpath=None):
    src = Dataset(srcpath).create()
    sub = src.create('sub')
    dest = Dataset(destpath).create()

    # recursion doesn't capture .git/
    dest.copy_file(sub.path, recursive=True)
    nok_((dest.pathobj / 'sub' / '.git').exists())

    # explicit instruction results in failure
    assert_status(
        'impossible',
        dest.copy_file(sub.pathobj / '.git',
                       recursive=True,
                       on_failure='ignore'))

    # same when the source has an OK name, but the dest now
    assert_in_results(dest.copy_file(
        [sub.pathobj / '.git' / 'config', dest.pathobj / 'some' / '.git'],
        on_failure='ignore'),
                      status='impossible',
                      action='copy_file')

    # The last path above wasn't treated as a target directory because it
    # wasn't an existing directory. We also guard against a '.git' in the
    # target directory code path, though the handling is different.
    with assert_raises(ValueError):
        dest.copy_file(
            [sub.pathobj / '.git' / 'config', dest.pathobj / '.git'])

    # A source path can have a leading .git/ if the destination is outside of
    # .git/.
    nok_((dest.pathobj / "config").exists())
    dest.copy_file(sub.pathobj / '.git' / 'config')
    ok_((dest.pathobj / "config").exists())

    target = dest.pathobj / 'some'
    nok_(target.exists())
    dest.copy_file([sub.pathobj / '.git' / 'config', target])
    ok_(target.exists())

    # But we only waste so many cycles trying to prevent foot shooting. This
    # next one sneaks by because only .name, not all upstream parts, is checked
    # for each destination that comes out of _yield_specs().
    badobj = dest.pathobj / '.git' / 'objects' / 'i-do-not-exist'
    dest.copy_file([sub.pathobj / '.git' / 'config', badobj])
    ok_(badobj.exists())
Exemple #9
0
def test_clean_subds_removal(path=None):
    ds = Dataset(path).create()
    subds1 = ds.create('one')
    subds2 = ds.create('two')
    eq_(sorted(ds.subdatasets(result_xfm='relpaths')), ['one', 'two'])
    assert_repo_status(ds.path)
    # now kill one
    res = ds.remove('one', reckless='availability', result_xfm=None)
    # subds1 got uninstalled, and ds got the removal of subds1 saved
    assert_result_count(res,
                        1,
                        path=subds1.path,
                        action='uninstall',
                        status='ok')
    assert_result_count(res, 1, path=subds1.path, action='remove', status='ok')
    assert_result_count(res, 1, path=ds.path, action='save', status='ok')
    ok_(not subds1.is_installed())
    assert_repo_status(ds.path)
    # two must remain
    eq_(ds.subdatasets(result_xfm='relpaths'), ['two'])
    # one is gone
    nok_(subds1.pathobj.exists())
    # and now again, but this time remove something that is not installed
    ds.create('three')
    eq_(sorted(ds.subdatasets(result_xfm='relpaths')), ['three', 'two'])
    ds.drop('two', what='all', reckless='availability')
    assert_repo_status(ds.path)
    eq_(sorted(ds.subdatasets(result_xfm='relpaths')), ['three', 'two'])
    nok_(subds2.is_installed())
    # oderly empty mountpoint is maintained
    ok_(subds2.pathobj.exists())
    res = ds.remove('two', reckless='availability')
    assert_in_results(res, path=str(ds.pathobj / 'two'), action='remove')
    assert_repo_status(ds.path)
    # subds2 was already uninstalled, now ds got the removal of subds2 saved
    nok_(subds2.pathobj.exists())
    eq_(ds.subdatasets(result_xfm='relpaths'), ['three'])
Exemple #10
0
def test_remove(path=None):
    # see docstring for test data structure
    ds = get_deeply_nested_structure(path)
    gitfile = op.join("subdir", "git_file.txt")

    ok_((ds.pathobj / gitfile).exists())
    res = ds.remove(gitfile, drop='all')
    assert_result_count(res, 3)
    # git file needs no dropping
    assert_in_results(
        res,
        action='drop',
        path=str(ds.pathobj / gitfile),
        status='notneeded',
        type='file',
    )
    # removed from working tree
    assert_in_results(
        res,
        action='remove',
        path=str(ds.pathobj / gitfile),
        status='ok',
        type='file',
    )
    # saved removal in dataset
    assert_in_results(
        res,
        action='save',
        path=ds.path,
        type='dataset',
        status='ok',
    )
    nok_((ds.pathobj / gitfile).exists())

    # now same for an annexed files
    annexedfile = op.join("subdir", "annexed_file.txt")
    # drop failure prevents removal
    res = ds.remove(annexedfile, drop='all', on_failure='ignore')
    assert_result_count(res, 1)
    assert_in_results(res,
                      status='error',
                      action='drop',
                      path=str(ds.pathobj / annexedfile))
    ok_((ds.pathobj / annexedfile).exists())

    # now remove the file, but actually not drop the underlying
    # key -- hence no availability loss -- default mode of operation
    # remember the key
    key = ds.repo.get_file_annexinfo(annexedfile)['key']
    res = ds.remove(annexedfile,
                    drop='datasets',
                    message="custom msg",
                    on_failure='ignore')
    # removal and dataset save
    assert_result_count(res, 2)
    eq_(
        ds.repo.format_commit("%B",
                              ds.repo.get_corresponding_branch()).rstrip(),
        "custom msg")
    assert_in_results(res,
                      action='remove',
                      status='ok',
                      path=str(ds.pathobj / annexedfile))
    assert_not_in_results(res, action='drop')
    nok_((ds.pathobj / annexedfile).exists())
    res = ds.repo.call_annex_records(['whereis', '--key', key, '--json'])
    assert_in_results(res, key=key, success=True)

    # now remove entire directory
    res = ds.remove('subdir', on_failure='ignore')
    assert_in_results(res, status='impossible', state='untracked')
    ok_((ds.pathobj / 'subdir').exists())

    ds.save('subdir')
    res = ds.remove('subdir', on_failure='ignore')
    assert_in_results(res, status='ok', action='remove')
    assert_in_results(res, status='ok', action='save', type='dataset')
    nok_((ds.pathobj / 'subdir').exists())

    # now remove an entire subdataset
    # prep: make clean
    rmdspath = ds.pathobj / 'subds_modified' / 'subds_lvl1_modified'
    ds.save(rmdspath, recursive=True)
    res = ds.remove(rmdspath, on_failure='ignore')
    # unique dataset, with unique keys -- must fail
    assert_in_results(res,
                      status='error',
                      action='uninstall',
                      path=str(rmdspath))

    # go reckless
    assert_in(
        str(rmdspath),
        ds.subdatasets(path='subds_modified',
                       recursive=True,
                       result_xfm='paths',
                       result_renderer='disabled'))
    res = ds.remove(rmdspath, reckless='availability', on_failure='ignore')
    assert_status('ok', res)
    assert_in_results(res, action='uninstall', path=str(rmdspath))
    assert_in_results(res, action='remove', path=str(rmdspath))
    nok_(rmdspath.exists())
    # properly unlinked
    assert_not_in(
        str(rmdspath),
        ds.subdatasets(path='subds_modified',
                       recursive=True,
                       result_xfm='paths',
                       result_renderer='disabled'))

    # lastly, remove an uninstalled subdataset
    # we save all to be able to check whether removal was committed and
    # the ds is clean at the end
    ds.save()
    # uninstall, we don't care about the existing modifications here
    res = ds.drop('subds_modified',
                  what='all',
                  reckless='kill',
                  recursive=True)
    # even remove the empty mount-point, such that is is invisible on the
    # file system
    (ds.pathobj / 'subds_modified').rmdir()
    res = ds.remove('subds_modified', on_failure='ignore')
    assert_in_results(res,
                      action='remove',
                      path=str(ds.pathobj / 'subds_modified'))
    # removal was committed
    assert_repo_status(ds.path)

    # and really finally, removing top-level is just a drop
    res = ds.remove(reckless='kill')
    assert_in_results(res, action='uninstall', path=ds.path, status='ok')
    nok_(ds.is_installed())
Exemple #11
0
def test_is_url():
    ok_(is_url('file://localhost/some'))
    ok_(is_url('http://localhost'))
    ok_(is_url('ssh://me@localhost'))
    # in current understanding it is indeed a url but an 'ssh', implicit=True, not just
    # a useless scheme=weird with a hope to point to a netloc
    with swallow_logs():
        ok_(is_url('weird://'))
    nok_(is_url('relative'))
    nok_(is_url('/absolute'))
    ok_(is_url('like@sshlogin'))  # actually we do allow ssh:implicit urls ATM
    nok_(is_url(''))
    nok_(is_url(' '))
    nok_(is_url(123))  # stuff of other types wouldn't be considered a URL

    # we can pass RI instance directly
    ok_(is_url(RI('file://localhost/some')))
    nok_(is_url(RI('relative')))