Beispiel #1
0
def test_put(client):
    client, ds = client
    with client as c:
        assert c.get('/api/v1/auth').status_code == 200
    ok_clean_git(ds.path)

    testpath = 'subdir/dummy'
    file_content = '{"three": 3}'
    assert testpath not in c.get('/api/v1/file').get_json()['files']

    count = 0
    for kw, content in (
        ({}, file_content),
        ({
            'json': 'stream'
        }, file_content),
        ({
            'json': 'yes'
        }, file_content),
    ):
        targetpath = '{}_{}'.format(testpath, count)
        rq = c.put(
            '/api/v1/file/{}'.format(targetpath),
            data=json.dumps(dict(content=file_content, )),
            content_type='application/json',
        )
        assert rq.status_code == 200
        assert targetpath in c.get('/api/v1/file').get_json()['files']
        ok_file_has_content(op.join(ds.path, targetpath), content=content)
        count += 1
        ok_clean_git(ds.path)
Beispiel #2
0
def _test_correct_publish(target_path, rootds=False, flat=True):

    paths = [_path_(".git/hooks/post-update")]     # hooks enabled in all datasets
    not_paths = []  # _path_(".git/datalad/metadata")]  # metadata only on publish
                    # ATM we run post-update hook also upon create since it might
                    # be a reconfiguration (TODO: I guess could be conditioned)

    # web-interface html pushed to dataset root
    web_paths = ['index.html', _path_(".git/datalad/web")]
    if rootds:
        paths += web_paths
    # and not to subdatasets
    elif not flat:
        not_paths += web_paths

    for path in paths:
        ok_exists(opj(target_path, path))

    for path in not_paths:
        assert_false(exists(opj(target_path, path)))

    # correct ls_json command in hook content (path wrapped in quotes)
    ok_file_has_content(_path_(target_path, '.git/hooks/post-update'),
                        '.*datalad ls -a --json file \'%s\'.*' % target_path,
                        re_=True,
                        flags=re.DOTALL)
Beispiel #3
0
def check_compress_file(ext, annex, path, name):
    # we base the archive name on the filename, in order to also
    # be able to properly test compressors where the corresponding
    # archive format has no capability of storing a filename
    # (i.e. where the archive name itself determines the filename
    # of the decompressed file, like .xz)
    archive = op.join(name, _filename + ext)
    compress_files([_filename], archive,
                   path=path)
    assert_true(op.exists(archive))
    if annex:
        # It should work even when file is annexed and is a symlink to the
        # key
        from datalad.support.annexrepo import AnnexRepo
        repo = AnnexRepo(path, init=True)
        repo.add(_filename)
        repo.commit(files=[_filename], msg="commit")

    dir_extracted = name + "_extracted"
    try:
        decompress_file(archive, dir_extracted)
    except MissingExternalDependency as exc:
        raise SkipTest(exc_str(exc))
    _filepath = op.join(dir_extracted, _filename)

    ok_file_has_content(_filepath, 'content')
Beispiel #4
0
def test_basics(path):
    ds = Dataset(path).create(force=True)
    # TODO: this procedure would leave a clean dataset, but `run` cannot handle dirty
    # input yet, so manual for now
    # V6FACT: this leaves the file staged, but not committed
    ds.add('code', to_git=True)
    # V6FACT: even this leaves it staged
    ds.add('.')
    # V6FACT: but this finally commits it
    ds.save()
    # TODO remove above two lines
    ds.run_procedure('setup_yoda_dataset')
    ok_clean_git(ds.path)
    # configure dataset to look for procedures in its code folder
    ds.config.add('datalad.locations.dataset-procedures',
                  'code',
                  where='dataset')
    # configure dataset to run the demo procedure prior to the clean command
    ds.config.add('datalad.clean.proc-pre',
                  'datalad_test_proc',
                  where='dataset')
    # run command that should trigger the demo procedure
    ds.clean()
    # look for traces
    ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'hello\n')
    ok_clean_git(ds.path, index_modified=[op.join('.datalad', 'config')])
Beispiel #5
0
def check_crawl_autoaddtext(gz, ind, topurl, outd):
    ds = create(outd)
    ds.run_procedure("cfg_text2git")
    with chpwd(outd):  # TODO -- dataset argument
        template_kwargs = {
            'url': topurl,
            'a_href_match_': '.*',
        }
        if gz:
            template_kwargs['archives_re'] = "\.gz$"
        crawl_init(template_kwargs, save=True, template='simple_with_archives')
        try:
            crawl()
        except MissingExternalDependency as exc:
            raise SkipTest(exc_str(exc))
    ok_clean_git(outd)
    ok_file_under_git(outd, "anothertext", annexed=False)
    ok_file_under_git(outd, "d/textfile", annexed=False)
    ok_file_under_git(outd, "d/tooshort", annexed=True)

    if 'compressed.dat.gz' in TEST_TREE2:
        if gz:
            ok_file_under_git(outd, "compressed.dat", annexed=False)
            ok_file_has_content(op.join(outd, "compressed.dat"),
                                u"мама мыла раму")
        else:
            ok_file_under_git(outd, "compressed.dat.gz", annexed=True)
    else:
        raise SkipTest(
            "Need datalad >= 0.11.2 to test .gz files decompression")
def test_basics(path, super_path):
    ds = Dataset(path).create(force=True)
    ds.run_procedure('setup_yoda_dataset')
    ok_clean_git(ds.path)
    assert_false(ds.repo.is_under_annex("README.md"))
    # configure dataset to look for procedures in its code folder
    ds.config.add('datalad.locations.dataset-procedures',
                  'code',
                  where='dataset')
    # commit this procedure config for later use in a clone:
    ds.add(op.join('.datalad', 'config'))
    # configure dataset to run the demo procedure prior to the clean command
    ds.config.add('datalad.clean.proc-pre',
                  'datalad_test_proc',
                  where='dataset')
    # run command that should trigger the demo procedure
    ds.clean()
    # look for traces
    ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'hello\n')
    ok_clean_git(ds.path, index_modified=[op.join('.datalad', 'config')])

    # make a fresh dataset:
    super = Dataset(super_path).create()
    # configure dataset to run the demo procedure prior to the clean command
    super.config.add('datalad.clean.proc-pre',
                     'datalad_test_proc',
                     where='dataset')
    # 'super' doesn't know any procedures but should get to know one by
    # installing the above as a subdataset
    super.install('sub', source=ds.path)
    # run command that should trigger the demo procedure
    super.clean()
    # look for traces
    ok_file_has_content(op.join(super.path, 'fromproc.txt'), 'hello\n')
    ok_clean_git(super.path, index_modified=[op.join('.datalad', 'config')])
Beispiel #7
0
def _test_correct_publish(target_path, rootds=False, flat=True):

    paths = [_path_(".git/hooks/post-update")]  # hooks enabled in all datasets
    not_paths = [
    ]  # _path_(".git/datalad/metadata")]  # metadata only on publish
    # ATM we run post-update hook also upon create since it might
    # be a reconfiguration (TODO: I guess could be conditioned)

    # web-interface html pushed to dataset root
    web_paths = ['index.html', _path_(".git/datalad/web")]
    if rootds:
        paths += web_paths
    # and not to subdatasets
    elif not flat:
        not_paths += web_paths

    for path in paths:
        ok_exists(opj(target_path, path))

    for path in not_paths:
        assert_false(exists(opj(target_path, path)))

    # correct ls_json command in hook content (path wrapped in quotes)
    ok_file_has_content(_path_(target_path, '.git/hooks/post-update'),
                        '.*datalad ls -a --json file \'%s\'.*' % target_path,
                        re_=True,
                        flags=re.DOTALL)
Beispiel #8
0
def test_run_no_explicit_dataset(path):
    raise SkipTest('SingularityHub is gone for now')
    ds = Dataset(path).create(force=True)
    ds.save()
    ds.containers_add("deb",
                      url=testimg_url,
                      call_fmt="singularity exec {img} {cmd}")

    # When no explicit dataset is given, paths are interpreted as relative to
    # the current working directory.

    # From top-level directory.
    with chpwd(path):
        containers_run("cat {inputs[0]} {inputs[0]} >doubled",
                       inputs=[op.join("subdir", "in")],
                       outputs=["doubled"])
        ok_file_has_content(op.join(path, "doubled"), "innardsinnards")

    # From under a subdirectory.
    subdir = op.join(ds.path, "subdir")
    with chpwd(subdir):
        containers_run("cat {inputs[0]} {inputs[0]} >doubled",
                       inputs=["in"],
                       outputs=["doubled"])
    ok_file_has_content(op.join(subdir, "doubled"), "innardsinnards")
    def test_addurls_url_on_collision_choose(self, path):
        ds = Dataset(path).create(force=True)
        data = deepcopy(self.data)
        for row in data:
            row["name"] = "a"

        with patch("sys.stdin", new=StringIO(json.dumps(data))):
            assert_in_results(ds.addurls("-",
                                         "{url}",
                                         "{name}",
                                         on_failure="ignore"),
                              action="addurls",
                              status="error")
        with patch("sys.stdin", new=StringIO(json.dumps(data))):
            assert_in_results(ds.addurls("-",
                                         "{url}",
                                         "{name}",
                                         on_collision="error-if-different",
                                         on_failure="ignore"),
                              action="addurls",
                              status="error")

        with patch("sys.stdin", new=StringIO(json.dumps(data))):
            ds.addurls("-", "{url}", "{name}-first", on_collision="take-first")
        ok_file_has_content(op.join(ds.path, "a-first"),
                            "a content",
                            strip=True)

        with patch("sys.stdin", new=StringIO(json.dumps(data))):
            ds.addurls("-", "{url}", "{name}-last", on_collision="take-last")
        ok_file_has_content(op.join(ds.path, "a-last"),
                            "c content",
                            strip=True)
Beispiel #10
0
def _test_target_ssh_inherit(standardgroup, src_path, target_path):
    ds = Dataset(src_path).create()
    target_url = 'localhost:%s' % target_path
    remote = "magical"
    # for the test of setting a group, will just smoke test while using current
    # user's group
    ds.create_sibling(target_url,
                      name=remote,
                      shared='group',
                      group=os.getgid())  # not doing recursively
    if standardgroup:
        ds.repo.set_preferred_content('wanted', 'standard', remote)
        ds.repo.set_preferred_content('group', standardgroup, remote)
    ds.publish(to=remote)

    # now a month later we created a new subdataset
    subds = ds.create('sub')  # so now we got a hierarchy!
    create_tree(subds.path, {'sub.dat': 'lots of data'})
    subds.add('sub.dat')
    ok_file_under_git(subds.path, 'sub.dat', annexed=True)

    target_sub = Dataset(opj(target_path, 'sub'))
    # since we do not have yet/thus have not used an option to record to publish
    # to that sibling by default (e.g. --set-upstream), if we run just ds.publish
    # -- should fail
    assert_result_count(
        ds.publish(on_failure='ignore'),
        1,
        status='impossible',
        message=
        'No target sibling configured for default publication, please specific via --to'
    )
    ds.publish(
        to=remote)  # should be ok, non recursive; BUT it (git or us?) would
    # create an empty sub/ directory
    ok_(not target_sub.is_installed())  # still not there
    res = ds.publish(to=remote, recursive=True, on_failure='ignore')
    assert_result_count(res, 2)
    assert_status(('error', 'notneeded'), res)
    assert_result_count(res,
                        1,
                        status='error',
                        message=("Unknown target sibling '%s' for publication",
                                 'magical'))
    ds.publish(to=remote, recursive=True, missing='inherit')
    # we added the remote and set all the
    eq_(subds.repo.get_preferred_content('wanted', remote),
        'standard' if standardgroup else '')
    eq_(subds.repo.get_preferred_content('group', remote), standardgroup or '')

    ok_(target_sub.is_installed())  # it is there now
    eq_(target_sub.repo.config.get('core.sharedrepository'), '1')
    # and we have transferred the content
    if standardgroup and standardgroup == 'backup':
        # only then content should be copied
        ok_file_has_content(opj(target_sub.path, 'sub.dat'), 'lots of data')
    else:
        # otherwise nothing is copied by default
        assert_false(target_sub.repo.file_has_content('sub.dat'))
Beispiel #11
0
def test_copy_file_recursion(srcdir, destdir):
    src_ds = Dataset(srcdir).create(force=True)
    src_ds.save()
    dest_ds = Dataset(destdir).create()
    copy_file([src_ds.pathobj / 'subdir', dest_ds.pathobj], recursive=True)
    # structure is mirrored
    ok_file_has_content(dest_ds.pathobj / 'subdir' / 'file1', '123')
    ok_file_has_content(dest_ds.pathobj / 'subdir' / 'file2', 'abc')
Beispiel #12
0
def test_newthings_coming_down(originpath, destpath):
    origin = GitRepo(originpath, create=True)
    create_tree(originpath, {'load.dat': 'heavy'})
    Dataset(originpath).save('load.dat')
    ds = install(source=originpath,
                 path=destpath,
                 result_xfm='datasets',
                 return_type='item-or-list')
    assert_is_instance(ds.repo, GitRepo)
    assert_in('origin', ds.repo.get_remotes())
    # turn origin into an annex
    origin = AnnexRepo(originpath, create=True)
    # clone doesn't know yet
    assert_false(knows_annex(ds.path))
    # but after an update it should
    # no merge, only one sibling, no parameters should be specific enough
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    assert (knows_annex(ds.path))
    # no branches appeared
    eq_(ds.repo.get_branches(), [DEFAULT_BRANCH])
    # now merge, and get an annex
    assert_result_count(ds.update(merge=True),
                        1,
                        action='update',
                        status='ok',
                        type='dataset')
    assert_in('git-annex', ds.repo.get_branches())
    assert_is_instance(ds.repo, AnnexRepo)
    # should be fully functional
    testfname = opj(ds.path, 'load.dat')
    assert_false(ds.repo.file_has_content(testfname))
    ds.get('.')
    ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy')
    # check that a new tag comes down
    origin.tag('first!')
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    eq_(ds.repo.get_tags(output='name')[0], 'first!')

    # and now we destroy the remote annex
    origin.call_git(['config', '--remove-section', 'annex'])
    rmtree(opj(origin.path, '.git', 'annex'), chmod_files=True)
    origin.call_git(['branch', '-D', 'git-annex'])
    origin = GitRepo(originpath)
    assert_false(knows_annex(originpath))

    # and update the local clone
    # for now this should simply not fail (see gh-793), later might be enhanced to a
    # graceful downgrade
    before_branches = ds.repo.get_branches()
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    eq_(before_branches, ds.repo.get_branches())
    # annex branch got pruned
    eq_(['origin/HEAD', 'origin/' + DEFAULT_BRANCH],
        ds.repo.get_remote_branches())
    # check that a new tag comes down even if repo types mismatch
    origin.tag('second!')
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    eq_(ds.repo.get_tags(output='name')[-1], 'second!')
Beispiel #13
0
def test_update_simple(origin, src_path, dst_path):

    # prepare src
    source = install(src_path, source=origin, recursive=True)[0]
    # forget we cloned it (provide no 'origin' anymore), which should lead to
    # setting tracking branch to target:
    source.repo.remove_remote("origin")

    # get a clone to update later on:
    dest = install(dst_path, source=src_path, recursive=True)[0]
    # test setup done;
    # assert all fine
    ok_clean_git(dst_path)
    ok_clean_git(src_path)

    # update yields nothing => up-to-date
    # TODO: how to test besides not failing?
    dest.update()
    ok_clean_git(dst_path)

    # modify origin:
    with open(opj(src_path, "update.txt"), "w") as f:
        f.write("Additional content")
    source.add(path="update.txt")
    source.save("Added update.txt")
    ok_clean_git(src_path)

    # update without `merge` only fetches:
    dest.update()
    # modification is not known to active branch:
    assert_not_in("update.txt",
                  dest.repo.get_files(dest.repo.get_active_branch()))
    # modification is known to branch origin/master
    assert_in("update.txt", dest.repo.get_files("origin/master"))

    # merge:
    dest.update(merge=True)
    # modification is now known to active branch:
    assert_in("update.txt", dest.repo.get_files(dest.repo.get_active_branch()))
    # it's known to annex, but has no content yet:
    dest.repo.get_file_key("update.txt")  # raises if unknown
    eq_([False], dest.repo.file_has_content(["update.txt"]))

    # smoke-test if recursive update doesn't fail if submodule is removed
    dest.remove('subm 1')
    dest.update(recursive=True)
    dest.update(merge=True, recursive=True)

    # and now test recursive update with merging in differences
    create_tree(opj(source.path, 'subm 2'), {'load.dat': 'heavy'})
    source.save(message="saving changes within subm2",
                recursive=True,
                all_changes=True)
    dest.update(merge=True, recursive=True)
    # and now we can get new file
    dest.get('subm 2/load.dat')
    ok_file_has_content(opj(dest.path, 'subm 2', 'load.dat'), 'heavy')
Beispiel #14
0
def test_update_simple(origin, src_path, dst_path):

    # prepare src
    source = install(src_path, source=origin, recursive=True)[0]
    # forget we cloned it (provide no 'origin' anymore), which should lead to
    # setting tracking branch to target:
    source.repo.remove_remote("origin")

    # get a clone to update later on:
    dest = install(dst_path, source=src_path, recursive=True)[0]
    # test setup done;
    # assert all fine
    ok_clean_git(dst_path)
    ok_clean_git(src_path)

    # update yields nothing => up-to-date
    # TODO: how to test besides not failing?
    dest.update()
    ok_clean_git(dst_path)

    # modify origin:
    with open(opj(src_path, "update.txt"), "w") as f:
        f.write("Additional content")
    source.add(path="update.txt")
    source.save("Added update.txt")
    ok_clean_git(src_path)

    # update without `merge` only fetches:
    dest.update()
    # modification is not known to active branch:
    assert_not_in("update.txt",
                  dest.repo.get_files(dest.repo.get_active_branch()))
    # modification is known to branch origin/master
    assert_in("update.txt", dest.repo.get_files("origin/master"))

    # merge:
    dest.update(merge=True)
    # modification is now known to active branch:
    assert_in("update.txt",
              dest.repo.get_files(dest.repo.get_active_branch()))
    # it's known to annex, but has no content yet:
    dest.repo.get_file_key("update.txt")  # raises if unknown
    eq_([False], dest.repo.file_has_content(["update.txt"]))

    # smoke-test if recursive update doesn't fail if submodule is removed
    dest.remove('subm 1')
    dest.update(recursive=True)
    dest.update(merge=True, recursive=True)

    # and now test recursive update with merging in differences
    create_tree(opj(source.path, 'subm 2'), {'load.dat': 'heavy'})
    source.save(message="saving changes within subm2",
                recursive=True, all_changes=True)
    dest.update(merge=True, recursive=True)
    # and now we can get new file
    dest.get('subm 2/load.dat')
    ok_file_has_content(opj(dest.path, 'subm 2', 'load.dat'), 'heavy')
Beispiel #15
0
def test_runner(tempfile):
    runner = Runner()
    content = 'Testing real run' if on_windows else 'Testing äöü東 real run'
    cmd = 'echo %s > %s' % (content, tempfile)
    res = runner.run(cmd)
    # no capture of any kind, by default
    ok_(not res['stdout'])
    ok_(not res['stderr'])
    ok_file_has_content(tempfile, content, strip=True)
    os.unlink(tempfile)
Beispiel #16
0
def test_publish_target_url(src, desttop, desturl):
    # https://github.com/datalad/datalad/issues/1762
    ds = Dataset(src).create(force=True)
    ds.save('1')
    ds.create_sibling('ssh://datalad-test:%s/subdir' % desttop,
                      name='target',
                      target_url=desturl + 'subdir/.git')
    results = ds.push(to='target')
    assert results
    ok_file_has_content(Path(desttop, 'subdir', '1'), '123')
Beispiel #17
0
def test_publish_target_url(src, desttop, desturl):
    # https://github.com/datalad/datalad/issues/1762
    ds = Dataset(src).create(force=True)
    ds.add('1')
    ds.create_sibling('ssh://localhost:%s/subdir' % desttop,
                      name='target',
                      target_url=desturl + 'subdir/.git')
    results = ds.publish(to='target', transfer_data='all')
    assert results
    ok_file_has_content(_path_(desttop, 'subdir/1'), '123')
Beispiel #18
0
def test_publish_target_url(src, desttop, desturl):
    # https://github.com/datalad/datalad/issues/1762
    ds = Dataset(src).create(force=True)
    ds.add('1')
    ds.create_sibling('ssh://localhost:%s/subdir' % desttop,
                      name='target',
                      target_url=desturl + 'subdir/.git')
    results = ds.publish(to='target', transfer_data='all')
    assert results
    ok_file_has_content(_path_(desttop, 'subdir/1'), '123')
Beispiel #19
0
def test_balsa_pipeline1(ind, topurl, outd, clonedir):
    list(initiate_dataset(
        template="balsa",
        dataset_name='dataladtest-WG33',
        path=outd,
        data_fields=['dataset_id'])({'dataset_id': 'WG33'}))

    with chpwd(outd):
        pipeline = ofpipeline('WG33', url=topurl)
        out = run_pipeline(pipeline)
    eq_(len(out), 1)

    repo = AnnexRepo(outd, create=False)  # to be used in the checks
    # Inspect the tree -- that we have all the branches
    branches = {'master', 'incoming', 'incoming-processed', 'git-annex'}
    eq_(set(repo.get_branches()), branches)
    # since now we base incoming on master -- and there were nothing custom
    # in master after incoming-processed, both branches should be the same
    eq_(repo.get_hexsha('master'), repo.get_hexsha('incoming-processed'))
    # but that one is different from incoming
    assert_not_equal(repo.get_hexsha('incoming'), repo.get_hexsha('incoming-processed'))

    get_branch_commits = repo.get_branch_commits_ \
        if hasattr(repo, 'get_branch_commits_') else repo.get_branch_commits
    commits = {b: list(get_branch_commits(b)) for b in branches}
    # all commits out there -- init ds + init crawler + 1*(incoming, processed)
    # The number of commits in master differs based on the create variant used
    # (the one DataLad's master makes only one commit).
    ncommits_master = len(commits["master"])
    assert_in(ncommits_master, [4, 5])
    # incoming branches from master but lacks one merge commit.
    eq_(len(commits['incoming']), ncommits_master - 1)
    # incoming-processed is on master.
    eq_(len(commits['incoming-processed']), ncommits_master)

    with chpwd(outd):
        eq_(set(glob('*')), {'dir1', 'file1.nii'})
        all_files = sorted(find_files('.'))

    fpath = opj(outd, 'file1.nii')
    ok_file_has_content(fpath, "content of file1.nii")
    ok_file_under_git(fpath, annexed=True)
    fpath2 = opj(outd, 'dir1', 'file2.nii')
    ok_file_has_content(fpath2, "content of file2.nii")
    ok_file_under_git(fpath2, annexed=True)

    target_files = {
        './.datalad/crawl/crawl.cfg',
        './.datalad/crawl/statuses/incoming.json',
        './.datalad/meta/balsa.json',
        './.datalad/config',
        './file1.nii', './dir1/file2.nii',
    }

    eq_(set(all_files), target_files)
Beispiel #20
0
def test_update_git_smoke(src_path, dst_path):
    # Apparently was just failing on git repos for basic lack of coverage, hence this quick test
    ds = Dataset(src_path).create(no_annex=True)
    target = install(
        dst_path, source=src_path,
        result_xfm='datasets', return_type='item-or-list')
    create_tree(ds.path, {'file.dat': '123'})
    ds.save('file.dat')
    assert_result_count(
        target.update(recursive=True, merge=True), 1,
        status='ok', type='dataset')
    ok_file_has_content(opj(target.path, 'file.dat'), '123')
Beispiel #21
0
def test_update_git_smoke(src_path, dst_path):
    # Apparently was just failing on git repos for basic lack of coverage, hence this quick test
    ds = Dataset(src_path).create(no_annex=True)
    target = install(
        dst_path, source=src_path,
        result_xfm='datasets', return_type='item-or-list')
    create_tree(ds.path, {'file.dat': '123'})
    ds.save('file.dat')
    assert_result_count(
        target.update(recursive=True, merge=True), 1,
        action='update', status='ok', type='dataset')
    ok_file_has_content(opj(target.path, 'file.dat'), '123')
Beispiel #22
0
def test_balsa_pipeline1(ind, topurl, outd, clonedir):
    list(
        initiate_dataset(template="balsa",
                         dataset_name='dataladtest-WG33',
                         path=outd,
                         data_fields=['dataset_id'])({
                             'dataset_id': 'WG33'
                         }))

    with chpwd(outd):
        pipeline = ofpipeline('WG33', url=topurl)
        out = run_pipeline(pipeline)
    eq_(len(out), 1)

    repo = AnnexRepo(outd, create=False)  # to be used in the checks
    # Inspect the tree -- that we have all the branches
    branches = {'master', 'incoming', 'incoming-processed', 'git-annex'}
    eq_(set(repo.get_branches()), branches)
    assert_not_equal(repo.get_hexsha('master'),
                     repo.get_hexsha('incoming-processed'))
    # and that one is different from incoming
    assert_not_equal(repo.get_hexsha('incoming'),
                     repo.get_hexsha('incoming-processed'))

    commits = {b: list(repo.get_branch_commits(b)) for b in branches}
    eq_(len(commits['incoming']), 1)
    eq_(len(commits['incoming-processed']), 2)
    eq_(
        len(commits['master']), 6
    )  # all commits out there -- init ds + init crawler + 1*(incoming, processed, merge)

    with chpwd(outd):
        eq_(set(glob('*')), {'dir1', 'file1.nii'})
        all_files = sorted(find_files('.'))

    fpath = opj(outd, 'file1.nii')
    ok_file_has_content(fpath, "content of file1.nii")
    ok_file_under_git(fpath, annexed=True)
    fpath2 = opj(outd, 'dir1', 'file2.nii')
    ok_file_has_content(fpath2, "content of file2.nii")
    ok_file_under_git(fpath2, annexed=True)

    target_files = {
        './.datalad/crawl/crawl.cfg',
        './.datalad/crawl/statuses/incoming.json',
        './.datalad/meta/balsa.json',
        './.datalad/config',
        './file1.nii',
        './dir1/file2.nii',
    }

    eq_(set(all_files), target_files)
Beispiel #23
0
def test_newthings_coming_down(originpath, destpath):
    origin = GitRepo(originpath, create=True)
    create_tree(originpath, {'load.dat': 'heavy'})
    Dataset(originpath).save('load.dat')
    ds = install(
        source=originpath, path=destpath,
        result_xfm='datasets', return_type='item-or-list')
    assert_is_instance(ds.repo, GitRepo)
    assert_in('origin', ds.repo.get_remotes())
    # turn origin into an annex
    origin = AnnexRepo(originpath, create=True)
    # clone doesn't know yet
    assert_false(knows_annex(ds.path))
    # but after an update it should
    # no merge, only one sibling, no parameters should be specific enough
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    assert(knows_annex(ds.path))
    # no branches appeared
    eq_(ds.repo.get_branches(), ['master'])
    # now merge, and get an annex
    assert_result_count(ds.update(merge=True), 1, status='ok', type='dataset')
    assert_in('git-annex', ds.repo.get_branches())
    assert_is_instance(ds.repo, AnnexRepo)
    # should be fully functional
    testfname = opj(ds.path, 'load.dat')
    assert_false(ds.repo.file_has_content(testfname))
    ds.get('.')
    ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy')
    # check that a new tag comes down
    origin.tag('first!')
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    eq_(ds.repo.get_tags(output='name')[0], 'first!')

    # and now we destroy the remote annex
    origin._git_custom_command([], ['git', 'config', '--remove-section', 'annex'])
    rmtree(opj(origin.path, '.git', 'annex'), chmod_files=True)
    origin._git_custom_command([], ['git', 'branch', '-D', 'git-annex'])
    origin = GitRepo(originpath)
    assert_false(knows_annex(originpath))

    # and update the local clone
    # for now this should simply not fail (see gh-793), later might be enhanced to a
    # graceful downgrade
    before_branches = ds.repo.get_branches()
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    eq_(before_branches, ds.repo.get_branches())
    # annex branch got pruned
    eq_(['origin/HEAD', 'origin/master'], ds.repo.get_remote_branches())
    # check that a new tag comes down even if repo types mismatch
    origin.tag('second!')
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    eq_(ds.repo.get_tags(output='name')[-1], 'second!')
Beispiel #24
0
def test_publish_target_url(src, desttop, desturl):
    # https://github.com/datalad/datalad/issues/1762
    ds = Dataset(src).create(force=True)
    if ds.repo.is_managed_branch():
        raise SkipTest(
            'Skipped due to https://github.com/datalad/datalad/issues/4075')
    ds.save('1')
    ds.create_sibling('ssh://localhost:%s/subdir' % desttop,
                      name='target',
                      target_url=desturl + 'subdir/.git')
    results = ds.push(to='target')
    assert results
    ok_file_has_content(Path(desttop, 'subdir', '1'), '123')
Beispiel #25
0
def test_clone_dataladri(src, topurl, path):
    # make plain git repo
    ds_path = opj(src, 'ds')
    gr = GitRepo(ds_path, create=True)
    gr.add('test.txt')
    gr.commit('demo')
    Runner(cwd=gr.path)(['git', 'update-server-info'])
    # now install it somewhere else
    with patch('datalad.consts.DATASETS_TOPURL', topurl):
        ds = clone('///ds', path, result_xfm='datasets', return_type='item-or-list')
    eq_(ds.path, path)
    ok_clean_git(path, annex=False)
    ok_file_has_content(opj(path, 'test.txt'), 'some')
Beispiel #26
0
def test_clone_dataladri(src, topurl, path):
    # make plain git repo
    ds_path = Path(src) / 'ds'
    gr = GitRepo(ds_path, create=True)
    gr.add('test.txt')
    gr.commit('demo')
    Runner(cwd=gr.path).run(['git', 'update-server-info'])
    # now install it somewhere else
    with patch('datalad.consts.DATASETS_TOPURL', topurl):
        ds = clone('///ds', path, result_xfm='datasets', return_type='item-or-list')
    eq_(ds.path, path)
    assert_repo_status(path, annex=False)
    ok_file_has_content(ds.pathobj / 'test.txt', 'some')
Beispiel #27
0
def _test_target_ssh_inherit(standardgroup, src_path, target_path):
    ds = Dataset(src_path).create()
    target_url = 'localhost:%s' % target_path
    remote = "magical"
    # for the test of setting a group, will just smoke test while using current
    # user's group
    ds.create_sibling(target_url, name=remote, shared='group', group=os.getgid())  # not doing recursively
    if standardgroup:
        ds.repo.set_preferred_content('wanted', 'standard', remote)
        ds.repo.set_preferred_content('group', standardgroup, remote)
    ds.publish(to=remote)

    # now a month later we created a new subdataset
    subds = ds.create('sub')  # so now we got a hierarchy!
    create_tree(subds.path, {'sub.dat': 'lots of data'})
    subds.add('sub.dat')
    ok_file_under_git(subds.path, 'sub.dat', annexed=True)

    target_sub = Dataset(opj(target_path, 'sub'))
    # since we do not have yet/thus have not used an option to record to publish
    # to that sibling by default (e.g. --set-upstream), if we run just ds.publish
    # -- should fail
    assert_result_count(
        ds.publish(on_failure='ignore'),
        1,
        status='impossible',
        message='No target sibling configured for default publication, please specific via --to')
    ds.publish(to=remote)  # should be ok, non recursive; BUT it (git or us?) would
                  # create an empty sub/ directory
    ok_(not target_sub.is_installed())  # still not there
    res = ds.publish(to=remote, recursive=True, on_failure='ignore')
    assert_result_count(res, 2)
    assert_status(('error', 'notneeded'), res)
    assert_result_count(
        res, 1,
        status='error',
        message=("Unknown target sibling '%s' for publication", 'magical'))
    ds.publish(to=remote, recursive=True, missing='inherit')
    # we added the remote and set all the
    eq_(subds.repo.get_preferred_content('wanted', remote), 'standard' if standardgroup else '')
    eq_(subds.repo.get_preferred_content('group', remote), standardgroup or '')

    ok_(target_sub.is_installed())  # it is there now
    eq_(target_sub.repo.config.get('core.sharedrepository'), '1')
    # and we have transferred the content
    if standardgroup and standardgroup == 'backup':
        # only then content should be copied
        ok_file_has_content(opj(target_sub.path, 'sub.dat'), 'lots of data')
    else:
        # otherwise nothing is copied by default
        assert_false(target_sub.repo.file_has_content('sub.dat'))
Beispiel #28
0
def test_spaces(path):
    """
    Test whether args with spaces are correctly parsed.
    """
    ds = Dataset(path).create(force=True)
    ds.run_procedure('cfg_yoda')
    # configure dataset to look for procedures in its code folder
    ds.config.add('datalad.locations.dataset-procedures',
                  'code',
                  where='dataset')
    # 1. run procedure based on execution guessing by run_procedure:
    ds.run_procedure(spec=['datalad_test_proc', 'with spaces', 'unrelated'])
    # check whether file has name with spaces
    ok_file_has_content(op.join(ds.path, 'with spaces'), 'hello\n')
Beispiel #29
0
def test_update_volatile_subds(originpath, destpath):
    origin = Dataset(originpath).create()
    ds = install(source=originpath,
                 path=destpath,
                 result_xfm='datasets',
                 return_type='item-or-list')
    # as a submodule
    sname = 'subm 1'
    osm1 = origin.create(sname)
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    # nothing without a merge, no inappropriate magic
    assert_not_in(sname, ds.subdatasets(result_xfm='relpaths'))
    assert_result_count(ds.update(merge=True), 1, status='ok', type='dataset')
    # and we should be able to do update with recursive invocation
    assert_result_count(ds.update(merge=True, recursive=True),
                        1,
                        status='ok',
                        type='dataset')
    # known, and placeholder exists
    assert_in(sname, ds.subdatasets(result_xfm='relpaths'))
    ok_(exists(opj(ds.path, sname)))

    # remove from origin
    origin.remove(sname)
    assert_result_count(ds.update(merge=True), 1, status='ok', type='dataset')
    # gone locally, wasn't checked out
    assert_not_in(sname, ds.subdatasets(result_xfm='relpaths'))
    assert_false(exists(opj(ds.path, sname)))

    # re-introduce at origin
    osm1 = origin.create(sname)
    create_tree(osm1.path, {'load.dat': 'heavy'})
    origin.add(opj(osm1.path, 'load.dat'))
    assert_result_count(ds.update(merge=True), 1, status='ok', type='dataset')
    # grab new content of uninstall subdataset, right away
    ds.get(opj(ds.path, sname, 'load.dat'))
    ok_file_has_content(opj(ds.path, sname, 'load.dat'), 'heavy')

    # now remove just-installed subdataset from origin again
    origin.remove(sname, check=False)
    assert_not_in(sname, origin.subdatasets(result_xfm='relpaths'))
    assert_in(sname, ds.subdatasets(result_xfm='relpaths'))
    # merge should disconnect the installed subdataset, but leave the actual
    # ex-subdataset alone
    assert_result_count(ds.update(merge=True, recursive=True),
                        1,
                        type='dataset')
    assert_not_in(sname, ds.subdatasets(result_xfm='relpaths'))
    ok_file_has_content(opj(ds.path, sname, 'load.dat'), 'heavy')
    ok_(Dataset(opj(ds.path, sname)).is_installed())
Beispiel #30
0
def test_gin_cloning(path):
    # can we clone a public ds anoynmously from gin and retrieve content
    ds = clone('https://gin.g-node.org/datalad/datalad-ci-target', path)
    ok_(ds.is_installed())
    annex_path = op.join('annex', 'two')
    git_path = op.join('git', 'one')
    eq_(ds.repo.file_has_content(annex_path), False)
    eq_(ds.repo.is_under_annex(git_path), False)
    result = ds.get(annex_path)
    assert_result_count(result, 1)
    assert_status('ok', result)
    eq_(result[0]['path'], op.join(ds.path, annex_path))
    ok_file_has_content(op.join(ds.path, annex_path), 'two\n')
    ok_file_has_content(op.join(ds.path, git_path), 'one\n')
Beispiel #31
0
def test_placeholders(path):
    ds = Dataset(path).create(force=True)
    ds.add(".")
    ds.run("echo {inputs} >{outputs}", inputs=[".", "*.in"], outputs=["c.out"])
    ok_file_has_content(opj(path, "c.out"), "a.in b.in\n")

    hexsha_before = ds.repo.get_hexsha()
    ds.rerun()
    eq_(hexsha_before, ds.repo.get_hexsha())

    ds.run("echo {inputs[0]} >getitem", inputs=["*.in"])
    ok_file_has_content(opj(path, "getitem"), "a.in\n")

    ds.run("echo {pwd} >expanded-pwd")
    ok_file_has_content(opj(path, "expanded-pwd"), path, strip=True)

    subdir_path = opj(path, "subdir")
    with chpwd(subdir_path):
        run("echo {pwd} >expanded-pwd")
    ok_file_has_content(opj(path, "subdir", "expanded-pwd"),
                        subdir_path,
                        strip=True)

    # Double brackets can be used to escape placeholders.
    ds.run("touch {{inputs}}", inputs=["*.in"])
    ok_exists(opj(path, "{inputs}"))
Beispiel #32
0
def test_install_dataladri(src, topurl, path):
    # make plain git repo
    ds_path = opj(src, 'ds')
    gr = GitRepo(ds_path, create=True)
    gr.add('test.txt')
    gr.commit('demo')
    Runner(cwd=gr.path)(['git', 'update-server-info'])
    # now install it somewhere else
    with patch('datalad.consts.DATASETS_TOPURL', topurl), \
            swallow_logs():
        ds = install(path, source='///ds')
    eq_(ds.path, path)
    ok_clean_git(path, annex=False)
    ok_file_has_content(opj(path, 'test.txt'), 'some')
Beispiel #33
0
def test_install_dataladri(src, topurl, path):
    # make plain git repo
    ds_path = opj(src, 'ds')
    gr = GitRepo(ds_path, create=True)
    gr.add('test.txt')
    gr.commit('demo')
    Runner(cwd=gr.path)(['git', 'update-server-info'])
    # now install it somewhere else
    with patch('datalad.consts.DATASETS_TOPURL', topurl), \
            swallow_logs():
        ds = install(path, source='///ds')
    eq_(ds.path, path)
    ok_clean_git(path, annex=False)
    ok_file_has_content(opj(path, 'test.txt'), 'some')
Beispiel #34
0
def test_swallow_logs(logfile):
    lgr = logging.getLogger('datalad')
    with swallow_logs(new_level=9) as cm:
        eq_(cm.out, '')
        lgr.log(8, "very heavy debug")
        eq_(cm.out, '')  # not even visible at level 9
        lgr.log(9, "debug1")
        eq_(cm.out, '[Level 9] debug1\n')  # not even visible at level 9
        lgr.info("info")
        eq_(cm.out, '[Level 9] debug1\n[INFO] info\n')  # not even visible at level 9
    with swallow_logs(new_level=9, file_=logfile) as cm:
        eq_(cm.out, '')
        lgr.info("next info")
    from datalad.tests.utils import ok_file_has_content
    ok_file_has_content(logfile, "[INFO] next info", strip=True)
Beispiel #35
0
def test_swallow_logs(logfile):
    lgr = logging.getLogger('datalad')
    with swallow_logs(new_level=9) as cm:
        eq_(cm.out, '')
        lgr.log(8, "very heavy debug")
        eq_(cm.out, '')  # not even visible at level 9
        lgr.log(9, "debug1")
        eq_(cm.out, '[Level 9] debug1\n')  # not even visible at level 9
        lgr.info("info")
        eq_(cm.out, '[Level 9] debug1\n[INFO] info\n')  # not even visible at level 9
    with swallow_logs(new_level=9, file_=logfile) as cm:
        eq_(cm.out, '')
        lgr.info("next info")
    from datalad.tests.utils import ok_file_has_content
    ok_file_has_content(logfile, "[INFO] next info", strip=True)
Beispiel #36
0
    def check_dss():
        # we added the remote and set all the
        for subds in subdss:
            eq_(subds.repo.get_preferred_content('wanted', remote), 'standard' if standardgroup else '')
            eq_(subds.repo.get_preferred_content('group', remote), standardgroup or '')

        for target_sub in target_subdss:
            ok_(target_sub.is_installed())  # it is there now
            eq_(target_sub.repo.config.get('core.sharedrepository'), '1')
            # and we have transferred the content
            if standardgroup and standardgroup == 'backup':
                # only then content should be copied
                ok_file_has_content(opj(target_sub.path, 'sub.dat'), 'lots of data')
            else:
                # otherwise nothing is copied by default
                assert_false(target_sub.repo.file_has_content('sub.dat'))
Beispiel #37
0
    def check_dss():
        # we added the remote and set all the
        for subds in subdss:
            eq_(subds.repo.get_preferred_content('wanted', remote), 'standard' if standardgroup else '')
            eq_(subds.repo.get_preferred_content('group', remote), standardgroup or '')

        for target_sub in target_subdss:
            ok_(target_sub.is_installed())  # it is there now
            eq_(target_sub.repo.config.get('core.sharedrepository'), '1')
            # and we have transferred the content
            if standardgroup and standardgroup == 'backup':
                # only then content should be copied
                ok_file_has_content(opj(target_sub.path, 'sub.dat'), 'lots of data')
            else:
                # otherwise nothing is copied by default
                assert_false(target_sub.repo.file_has_content('sub.dat'))
Beispiel #38
0
def test_clone_dataladri(src, topurl, path):
    # make plain git repo
    ds_path = opj(src, 'ds')
    gr = GitRepo(ds_path, create=True)
    gr.add('test.txt')
    gr.commit('demo')
    Runner(cwd=gr.path)(['git', 'update-server-info'])
    # now install it somewhere else
    with patch('datalad.support.network.DATASETS_TOPURL', topurl):
        ds = clone('///ds',
                   path,
                   result_xfm='datasets',
                   return_type='item-or-list')
    eq_(ds.path, path)
    ok_clean_git(path, annex=False)
    ok_file_has_content(opj(path, 'test.txt'), 'some')
Beispiel #39
0
def test_balsa_pipeline2(ind, topurl, outd, clonedir):
    list(
        initiate_dataset(template="balsa",
                         dataset_name='dataladtest-WG33',
                         path=outd,
                         data_fields=['dataset_id'])({
                             'dataset_id': 'WG33'
                         }))

    with chpwd(outd):
        with swallow_logs(new_level=logging.WARN) as cml:
            pipeline = ofpipeline('WG33', url=topurl)
            out = run_pipeline(pipeline)
            assert_true(
                'The following files do not exist in the canonical tarball, '
                'but are individually listed files and will not be kept:' in
                cml.out)
            assert_true(
                './file1.nii varies in content from the individually downloaded '
                'file with the same name, it is removed and file from canonical '
                'tarball is kept' in cml.out)
    eq_(len(out), 1)

    with chpwd(outd):
        eq_(set(glob('*')), {'dir1', 'file1.nii'})
        all_files = sorted(find_files('.'))

    fpath = opj(outd, 'file1.nii')
    ok_file_has_content(fpath, "content of file1.nii")
    ok_file_under_git(fpath, annexed=True)
    fpath2 = opj(outd, 'dir1', 'file2.nii')
    ok_file_has_content(fpath2, "content of file2.nii")
    ok_file_under_git(fpath2, annexed=True)

    target_files = {
        './.datalad/config',
        './.datalad/crawl/crawl.cfg',
        './.datalad/crawl/statuses/incoming.json',
        './.datalad/meta/balsa.json',
        './file1.nii',
        './dir1/file2.nii',
    }

    eq_(set(all_files), target_files)
Beispiel #40
0
def test_copy_file_into_nonannex(workdir):
    workdir = Path(workdir)
    src_ds = Dataset(workdir / 'src').create()
    (src_ds.pathobj / 'present.txt').write_text('123')
    (src_ds.pathobj / 'gone.txt').write_text('abc')
    src_ds.save()
    src_ds.drop('gone.txt', check=False)

    # destination has no annex
    dest_ds = Dataset(workdir / 'dest').create(annex=False)
    # no issue copying a file that has content
    copy_file([src_ds.pathobj / 'present.txt', dest_ds.pathobj])
    ok_file_has_content(dest_ds.pathobj / 'present.txt', '123')
    # but cannot handle a dropped file, no chance to register
    # availability info in an annex
    assert_status(
        'impossible',
        copy_file([src_ds.pathobj / 'gone.txt', dest_ds.pathobj],
                  on_failure='ignore'))
Beispiel #41
0
def test_inputs_quotes_needed(path):
    ds = Dataset(path).create(force=True)
    ds.save()
    cmd = "import sys; open(sys.argv[-1], 'w').write('!'.join(sys.argv[1:]))"
    # The string form of a command works fine when the inputs/outputs have
    # spaces ...
    cmd_str = "{} -c \"{}\" {{inputs}} {{outputs[0]}}".format(
        sys.executable, cmd)
    ds.run(cmd_str, inputs=["*.t*"], outputs=["out0"], expand="inputs")
    expected = u"!".join(
        list(sorted([OBSCURE_FILENAME + u".t", "bar.txt", "foo blah.txt"])) +
        ["out0"])
    with open(op.join(path, "out0")) as ifh:
        eq_(assure_unicode(ifh.read()), expected)
    # ... but the list form of a command does not. (Don't test this failure
    # with the obscure file name because we'd need to know its composition to
    # predict the failure.)
    cmd_list = [sys.executable, "-c", cmd, "{inputs}", "{outputs[0]}"]
    ds.run(cmd_list, inputs=["*.txt"], outputs=["out0"])
    ok_file_has_content(op.join(path, "out0"), "bar.txt foo!blah.txt!out0")
Beispiel #42
0
def test_basics(path, super_path):
    ds = Dataset(path).create(force=True)
    ds.run_procedure('setup_yoda_dataset')
    ok_clean_git(ds.path)
    assert_false(ds.repo.is_under_annex("README.md"))
    # configure dataset to look for procedures in its code folder
    ds.config.add(
        'datalad.locations.dataset-procedures',
        'code',
        where='dataset')
    # commit this procedure config for later use in a clone:
    ds.add(op.join('.datalad', 'config'))
    # configure dataset to run the demo procedure prior to the clean command
    ds.config.add(
        'datalad.clean.proc-pre',
        'datalad_test_proc',
        where='dataset')
    # run command that should trigger the demo procedure
    ds.clean()
    # look for traces
    ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'hello\n')
    ok_clean_git(ds.path, index_modified=[op.join('.datalad', 'config')])

    # make a fresh dataset:
    super = Dataset(super_path).create()
    # configure dataset to run the demo procedure prior to the clean command
    super.config.add(
        'datalad.clean.proc-pre',
        'datalad_test_proc',
        where='dataset')
    # 'super' doesn't know any procedures but should get to know one by
    # installing the above as a subdataset
    super.install('sub', source=ds.path)
    # run command that should trigger the demo procedure
    super.clean()
    # look for traces
    ok_file_has_content(op.join(super.path, 'fromproc.txt'), 'hello\n')
    ok_clean_git(super.path, index_modified=[op.join('.datalad', 'config')])
Beispiel #43
0
def _test_correct_publish(target_path, rootds=False, flat=True):

    paths = [_path_(".git/hooks/post-update")]     # hooks enabled in all datasets
    not_paths = []  # _path_(".git/datalad/metadata")]  # metadata only on publish
                    # ATM we run post-update hook also upon create since it might
                    # be a reconfiguration (TODO: I guess could be conditioned)

    # web-interface html pushed to dataset root
    web_paths = ['index.html', _path_(".git/datalad/web")]
    if rootds:
        paths += web_paths
    # and not to subdatasets
    elif not flat:
        not_paths += web_paths

    for path in paths:
        ok_exists(opj(target_path, path))

    for path in not_paths:
        assert_false(exists(opj(target_path, path)))

    hook_path = _path_(target_path, '.git/hooks/post-update')
    # No longer the case -- we are no longer using absolute path in the
    # script
    # ok_file_has_content(hook_path,
    #                     '.*\ndsdir="%s"\n.*' % target_path,
    #                     re_=True,
    #                     flags=re.DOTALL)
    # No absolute path (so dataset could be moved) in the hook
    with open(hook_path) as f:
        assert_not_in(target_path, f.read())
    # correct ls_json command in hook content (path wrapped in "quotes)
    ok_file_has_content(hook_path,
                        '.*datalad ls -a --json file \..*',
                        re_=True,
                        flags=re.DOTALL)
Beispiel #44
0
def test_create_tree(path):
    content = u"мама мыла раму"
    create_tree(path, OrderedDict([
        ('1', content),
        ('sd', OrderedDict(
            [
            # right away an obscure case where we have both 1 and 1.gz
                ('1', content*2),
                ('1.gz', content*3),
            ]
        )),
    ]))
    ok_file_has_content(op.join(path, '1'), content)
    ok_file_has_content(op.join(path, 'sd', '1'), content*2)
    ok_file_has_content(op.join(path, 'sd', '1.gz'), content*3, decompress=True)
Beispiel #45
0
def test_reobtain_data(originpath, destpath):
    origin = Dataset(originpath).create()
    ds = install(
        source=originpath, path=destpath,
        result_xfm='datasets', return_type='item-or-list')
    # no harm
    assert_result_count(ds.update(merge=True, reobtain_data=True), 1)
    # content
    create_tree(origin.path, {'load.dat': 'heavy'})
    origin.save(opj(origin.path, 'load.dat'))
    # update does not bring data automatically
    assert_result_count(ds.update(merge=True, reobtain_data=True), 1)
    assert_in('load.dat', ds.repo.get_annexed_files())
    assert_false(ds.repo.file_has_content('load.dat'))
    # now get data
    ds.get('load.dat')
    ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy')
    # new content at origin
    create_tree(origin.path, {'novel': 'but boring'})
    origin.save()
    # update must not bring in data for new file
    result = ds.update(merge=True, reobtain_data=True)
    assert_in_results(result, action='get', status='notneeded')

    ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy')
    assert_in('novel', ds.repo.get_annexed_files())
    assert_false(ds.repo.file_has_content('novel'))
    # modify content at origin
    os.remove(opj(origin.path, 'load.dat'))
    create_tree(origin.path, {'load.dat': 'light'})
    origin.save()
    # update must update file with existing data, but leave empty one alone
    res = ds.update(merge=True, reobtain_data=True)
    assert_result_count(res, 2)
    assert_result_count(res, 1, status='ok', type='dataset', action='update')
    assert_result_count(res, 1, status='ok', type='file', action='get')
    ok_file_has_content(opj(ds.path, 'load.dat'), 'light')
    assert_false(ds.repo.file_has_content('novel'))
Beispiel #46
0
def test_configs(path):

    # set up dataset with registered procedure (c&p from test_basics):
    ds = Dataset(path).create(force=True)
    ds.run_procedure('setup_yoda_dataset')
    ok_clean_git(ds.path)
    # configure dataset to look for procedures in its code folder
    ds.config.add(
        'datalad.locations.dataset-procedures',
        'code',
        where='dataset')

    # 1. run procedure based on execution guessing by run_procedure:
    ds.run_procedure(spec=['datalad_test_proc', 'some_arg'])
    # look for traces
    ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'some_arg\n')

    # 2. now configure specific call format including usage of substitution config
    # for run:
    ds.config.add(
        'datalad.procedures.datalad_test_proc.call-format',
        'python "{script}" "{ds}" {{mysub}} {args}',
        where='dataset'
    )
    ds.config.add(
        'datalad.run.substitutions.mysub',
        'dataset-call-config',
        where='dataset'
    )
    # TODO: Should we allow for --inputs/--outputs arguments for run_procedure
    #       (to be passed into run)?
    ds.unlock("fromproc.txt")
    # run again:
    ds.run_procedure(spec=['datalad_test_proc', 'some_arg'])
    # look for traces
    ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'dataset-call-config\n')

    # 3. have a conflicting config at user-level, which should override the
    # config on dataset level:
    ds.config.add(
        'datalad.procedures.datalad_test_proc.call-format',
        'python "{script}" "{ds}" local {args}',
        where='local'
    )
    ds.unlock("fromproc.txt")
    # run again:
    ds.run_procedure(spec=['datalad_test_proc', 'some_arg'])
    # look for traces
    ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'local\n')

    # 4. get configured help message:
    r = ds.run_procedure('datalad_test_proc', help_proc=True,
                         on_failure='ignore')
    assert_true(len(r) == 1)
    assert_in_results(r, status="impossible")

    ds.config.add(
        'datalad.procedures.datalad_test_proc.help',
        "This is a help message",
        where='dataset'
    )

    r = ds.run_procedure('datalad_test_proc', help_proc=True)
    assert_true(len(r) == 1)
    assert_in_results(r, message="This is a help message", status='ok')
Beispiel #47
0
def test_update_volatile_subds(originpath, otherpath, destpath):
    origin = Dataset(originpath).create()
    ds = install(
        source=originpath, path=destpath,
        result_xfm='datasets', return_type='item-or-list')
    # as a submodule
    sname = 'subm 1'
    osm1 = origin.create(sname)
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    # nothing without a merge, no inappropriate magic
    assert_not_in(sname, ds.subdatasets(result_xfm='relpaths'))
    assert_result_count(ds.update(merge=True), 1, status='ok', type='dataset')
    # and we should be able to do update with recursive invocation
    assert_result_count(ds.update(merge=True, recursive=True), 1, status='ok', type='dataset')
    # known, and placeholder exists
    assert_in(sname, ds.subdatasets(result_xfm='relpaths'))
    ok_(exists(opj(ds.path, sname)))

    # remove from origin
    origin.remove(sname)
    assert_result_count(ds.update(merge=True), 1, status='ok', type='dataset')
    # gone locally, wasn't checked out
    assert_not_in(sname, ds.subdatasets(result_xfm='relpaths'))
    assert_false(exists(opj(ds.path, sname)))

    # re-introduce at origin
    osm1 = origin.create(sname)
    create_tree(osm1.path, {'load.dat': 'heavy'})
    origin.save(opj(osm1.path, 'load.dat'))
    assert_result_count(ds.update(merge=True), 1, status='ok', type='dataset')
    # grab new content of uninstall subdataset, right away
    ds.get(opj(ds.path, sname, 'load.dat'))
    ok_file_has_content(opj(ds.path, sname, 'load.dat'), 'heavy')

    # modify ds and subds at origin
    create_tree(origin.path, {'mike': 'this', sname: {'probe': 'little'}})
    origin.save(recursive=True)
    ok_clean_git(origin.path)

    # updates for both datasets should come down the pipe
    assert_result_count(ds.update(merge=True, recursive=True),
                        2, status='ok', type='dataset')
    ok_clean_git(ds.path)

    # now remove just-installed subdataset from origin again
    origin.remove(sname, check=False)
    assert_not_in(sname, origin.subdatasets(result_xfm='relpaths'))
    assert_in(sname, ds.subdatasets(result_xfm='relpaths'))
    # merge should disconnect the installed subdataset, but leave the actual
    # ex-subdataset alone
    assert_result_count(ds.update(merge=True, recursive=True),
                        1, type='dataset')
    assert_not_in(sname, ds.subdatasets(result_xfm='relpaths'))
    ok_file_has_content(opj(ds.path, sname, 'load.dat'), 'heavy')
    ok_(Dataset(opj(ds.path, sname)).is_installed())

    # now remove the now disconnected subdataset for further tests
    # not using a bound method, not giving a parentds, should
    # not be needed to get a clean dataset
    remove(op.join(ds.path, sname), check=False)
    ok_clean_git(ds.path)

    # new separate subdataset, not within the origin dataset
    otherds = Dataset(otherpath).create()
    # install separate dataset as a submodule
    ds.install(source=otherds.path, path='other')
    create_tree(otherds.path, {'brand': 'new'})
    otherds.save()
    ok_clean_git(otherds.path)
    # pull in changes
    res = ds.update(merge=True, recursive=True)
    assert_result_count(
        res, 2, status='ok', action='update', type='dataset')
    # the next is to check for #2858
    ok_clean_git(ds.path)
Beispiel #48
0
def test_something(path, new_home):
    # read nothing, has nothing
    cfg = ConfigManager(dataset_only=True)
    assert_false(len(cfg))
    # now read the example config
    cfg = ConfigManager(Dataset(opj(path, 'ds')), dataset_only=True)
    assert_equal(len(cfg), 3)
    assert_in('something.user', cfg)
    # multi-value
    assert_equal(len(cfg['something.user']), 2)
    assert_equal(cfg['something.user'], ('name=Jane Doe', '[email protected]'))

    assert_true(cfg.has_section('something'))
    assert_false(cfg.has_section('somethingelse'))
    assert_equal(sorted(cfg.sections()),
                 [u'onemore.complicated の beast with.dot', 'something'])
    assert_true(cfg.has_option('something', 'user'))
    assert_false(cfg.has_option('something', 'us?er'))
    assert_false(cfg.has_option('some?thing', 'user'))
    assert_equal(sorted(cfg.options('something')), ['myint', 'user'])
    assert_equal(cfg.options(u'onemore.complicated の beast with.dot'), ['findme'])

    assert_equal(
        sorted(cfg.items()),
        [(u'onemore.complicated の beast with.dot.findme', '5.0'),
         ('something.myint', '3'),
         ('something.user', ('name=Jane Doe', '[email protected]'))])
    assert_equal(
        sorted(cfg.items('something')),
        [('something.myint', '3'),
         ('something.user', ('name=Jane Doe', '[email protected]'))])

    # always get all values
    assert_equal(
        cfg.get('something.user'),
        ('name=Jane Doe', '[email protected]'))
    assert_raises(KeyError, cfg.__getitem__, 'somedthing.user')
    assert_equal(cfg.getfloat(u'onemore.complicated の beast with.dot', 'findme'), 5.0)
    assert_equal(cfg.getint('something', 'myint'), 3)
    assert_equal(cfg.getbool('something', 'myint'), True)
    assert_equal(cfg.getbool('doesnot', 'exist', default=True), True)
    assert_raises(TypeError, cfg.getbool, 'something', 'user')

    # gitpython-style access
    assert_equal(cfg.get('something.myint'), cfg.get_value('something', 'myint'))
    assert_equal(cfg.get_value('doesnot', 'exist', default='oohaaa'), 'oohaaa')
    # weired, but that is how it is
    assert_raises(KeyError, cfg.get_value, 'doesnot', 'exist', default=None)

    # modification follows
    cfg.add('something.new', 'の')
    assert_equal(cfg.get('something.new'), u'の')
    # sections are added on demand
    cfg.add('unheard.of', 'fame')
    assert_true(cfg.has_section('unheard.of'))
    comp = cfg.items('something')
    cfg.rename_section('something', 'this')
    assert_true(cfg.has_section('this'))
    assert_false(cfg.has_section('something'))
    # direct comparision would fail, because of section prefix
    assert_equal(len(cfg.items('this')), len(comp))
    # fail if no such section
    with swallow_logs():
        assert_raises(CommandError, cfg.rename_section, 'nothere', 'irrelevant')
    assert_true(cfg.has_option('this', 'myint'))
    cfg.unset('this.myint')
    assert_false(cfg.has_option('this', 'myint'))

    # batch a changes
    cfg.add('mike.wants.to', 'know', reload=False)
    assert_false('mike.wants.to' in cfg)
    cfg.add('mike.wants.to', 'eat')
    assert_true('mike.wants.to' in cfg)
    assert_equal(len(cfg['mike.wants.to']), 2)

    # set a new one:
    cfg.set('mike.should.have', 'known')
    assert_in('mike.should.have', cfg)
    assert_equal(cfg['mike.should.have'], 'known')
    # set an existing one:
    cfg.set('mike.should.have', 'known better')
    assert_equal(cfg['mike.should.have'], 'known better')
    # set, while there are several matching ones already:
    cfg.add('mike.should.have', 'a meal')
    assert_equal(len(cfg['mike.should.have']), 2)
    # raises with force=False
    assert_raises(CommandError,
                  cfg.set, 'mike.should.have', 'a beer', force=False)
    assert_equal(len(cfg['mike.should.have']), 2)
    # replaces all matching ones with force=True
    cfg.set('mike.should.have', 'a beer', force=True)
    assert_equal(cfg['mike.should.have'], 'a beer')

    # fails unknown location
    assert_raises(ValueError, cfg.add, 'somesuch', 'shit', where='umpalumpa')

    # very carefully test non-local config
    # so carefully that even in case of bad weather Yarik doesn't find some
    # lame datalad unittest sections in his precious ~/.gitconfig
    with patch.dict('os.environ',
                    {'HOME': new_home, 'DATALAD_SNEAKY_ADDITION': 'ignore'}):
        global_gitconfig = opj(new_home, '.gitconfig')
        assert(not exists(global_gitconfig))
        globalcfg = ConfigManager(dataset_only=False)
        assert_not_in('datalad.unittest.youcan', globalcfg)
        assert_in('datalad.sneaky.addition', globalcfg)
        cfg.add('datalad.unittest.youcan', 'removeme', where='global')
        assert(exists(global_gitconfig))
        # it did not go into the dataset's config!
        assert_not_in('datalad.unittest.youcan', cfg)
        # does not monitor additions!
        globalcfg.reload(force=True)
        assert_in('datalad.unittest.youcan', globalcfg)
        with swallow_logs():
            assert_raises(
                CommandError,
                globalcfg.unset,
                'datalad.unittest.youcan',
                where='local')
        assert(globalcfg.has_section('datalad.unittest'))
        globalcfg.unset('datalad.unittest.youcan', where='global')
        # but after we unset the only value -- that section is no longer listed
        assert (not globalcfg.has_section('datalad.unittest'))
        assert_not_in('datalad.unittest.youcan', globalcfg)
        if external_versions['cmd:git'] < '2.18':
            # older versions leave empty section behind in the file
            ok_file_has_content(global_gitconfig, '[datalad "unittest"]', strip=True)
            # remove_section to clean it up entirely
            globalcfg.remove_section('datalad.unittest', where='global')
        ok_file_has_content(global_gitconfig, "")

    cfg = ConfigManager(
        Dataset(opj(path, 'ds')),
        dataset_only=True,
        overrides={'datalad.godgiven': True})
    assert_equal(cfg.get('datalad.godgiven'), True)
    # setter has no effect
    cfg.set('datalad.godgiven', 'false')
    assert_equal(cfg.get('datalad.godgiven'), True)
Beispiel #49
0
def test_placeholders(path):
    ds = Dataset(path).create(force=True)
    ds.add(".")
    ds.run("echo {inputs} >{outputs}", inputs=[".", "*.in"], outputs=["c.out"])
    ok_file_has_content(opj(path, "c.out"), "a.in b.in\n")

    hexsha_before = ds.repo.get_hexsha()
    ds.rerun()
    eq_(hexsha_before, ds.repo.get_hexsha())

    ds.run("echo {inputs[0]} >getitem", inputs=["*.in"])
    ok_file_has_content(opj(path, "getitem"), "a.in\n")

    ds.run("echo {pwd} >expanded-pwd")
    ok_file_has_content(opj(path, "expanded-pwd"), path,
                        strip=True)

    ds.run("echo {dspath} >expanded-dspath")
    ok_file_has_content(opj(path, "expanded-dspath"), ds.path,
                        strip=True)

    subdir_path = opj(path, "subdir")
    with chpwd(subdir_path):
        run("echo {pwd} >expanded-pwd")
    ok_file_has_content(opj(path, "subdir", "expanded-pwd"), subdir_path,
                        strip=True)
    eq_(get_run_info(ds, ds.repo.format_commit("%B"))[1]["pwd"],
        "subdir")

    # Double brackets can be used to escape placeholders.
    ds.run("touch {{inputs}}", inputs=["*.in"])
    ok_exists(opj(path, "{inputs}"))

    # rerun --script expands the placeholders.
    with patch("sys.stdout", new_callable=StringIO) as cmout:
        ds.rerun(script="-", since="")
        script_out = cmout.getvalue()
        assert_in("echo a.in b.in >c.out", script_out)
        assert_in("echo {} >expanded-pwd".format(subdir_path),
                  script_out)
        assert_in("echo {} >expanded-dspath".format(ds.path),
                  script_out)

    assert_result_count(
        ds.run("{unknown_placeholder}", on_failure="ignore"),
        1, status="impossible", action="run")

    # Configured placeholders.
    ds.config.add("datalad.run.substitutions.license", "gpl3", where="local")
    ds.run("echo {license} >configured-license")
    ok_file_has_content(opj(path, "configured-license"), "gpl3", strip=True)
    # --script handles configured placeholders.
    with patch("sys.stdout", new_callable=StringIO) as cmout:
        ds.rerun(script="-")
        assert_in("gpl3", cmout.getvalue())
Beispiel #50
0
def test_update_simple(origin, src_path, dst_path):

    # prepare src
    source = install(src_path, source=origin, recursive=True)
    # forget we cloned it (provide no 'origin' anymore), which should lead to
    # setting tracking branch to target:
    source.repo.remove_remote("origin")

    # dataset without sibling will not need updates
    assert_status('notneeded', source.update())
    # deprecation message doesn't ruin things
    assert_status('notneeded', source.update(fetch_all=True))
    # but error if unknown sibling is given
    assert_status('impossible', source.update(sibling='funky', on_failure='ignore'))

    # get a clone to update later on:
    dest = install(dst_path, source=src_path, recursive=True)
    # test setup done;
    # assert all fine
    ok_clean_git(dst_path)
    ok_clean_git(src_path)

    # update yields nothing => up-to-date
    assert_status('ok', dest.update())
    ok_clean_git(dst_path)

    # modify origin:
    with open(opj(src_path, "update.txt"), "w") as f:
        f.write("Additional content")
    source.save(path="update.txt", message="Added update.txt")
    ok_clean_git(src_path)

    # fail when asked to update a non-dataset
    assert_status(
        'impossible',
        source.update("update.txt", on_failure='ignore'))
    # fail when asked to update a something non-existent
    assert_status(
        'impossible',
        source.update("nothere", on_failure='ignore'))

    # update without `merge` only fetches:
    assert_status('ok', dest.update())
    # modification is not known to active branch:
    assert_not_in("update.txt",
                  dest.repo.get_files(dest.repo.get_active_branch()))
    # modification is known to branch origin/master
    assert_in("update.txt", dest.repo.get_files("origin/master"))

    # merge:
    assert_status('ok', dest.update(merge=True))
    # modification is now known to active branch:
    assert_in("update.txt",
              dest.repo.get_files(dest.repo.get_active_branch()))
    # it's known to annex, but has no content yet:
    dest.repo.get_file_key("update.txt")  # raises if unknown
    eq_([False], dest.repo.file_has_content(["update.txt"]))

    # smoke-test if recursive update doesn't fail if submodule is removed
    # and that we can run it from within a dataset without providing it
    # explicitly
    assert_result_count(
        dest.remove('subm 1'), 1,
        status='ok', action='remove', path=opj(dest.path, 'subm 1'))
    with chpwd(dest.path):
        assert_result_count(
            update(recursive=True), 2,
            status='ok', type='dataset')
    assert_result_count(
        dest.update(merge=True, recursive=True), 2,
        status='ok', type='dataset')

    # and now test recursive update with merging in differences
    create_tree(opj(source.path, '2'), {'load.dat': 'heavy'})
    source.save(opj('2', 'load.dat'),
                message="saving changes within subm2",
                recursive=True)
    assert_result_count(
        dest.update(merge=True, recursive=True), 2,
        status='ok', type='dataset')
    # and now we can get new file
    dest.get('2/load.dat')
    ok_file_has_content(opj(dest.path, '2', 'load.dat'), 'heavy')
Beispiel #51
0
def test_publish_depends(
        origin,
        src_path,
        target1_path,
        target2_path,
        target3_path):
    # prepare src
    source = install(src_path, source=origin, recursive=True)
    source.repo.get('test-annex.dat')
    # pollute config
    depvar = 'remote.target2.datalad-publish-depends'
    source.config.add(depvar, 'stupid', where='local')
    eq_(source.config.get(depvar, None), 'stupid')

    # two remote sibling on two "different" hosts
    source.create_sibling(
        'ssh://localhost' + target1_path,
        annex_wanted='standard',
        annex_group='backup',
        name='target1')
    # fails with unknown remote
    res = source.create_sibling(
        'ssh://datalad-test' + target2_path,
        name='target2',
        existing='reconfigure',  # because 'target2' is known in polluted cfg
        publish_depends='bogus',
        on_failure='ignore')
    assert_result_count(
        res, 1,
        status='error',
        message=(
            'unknown sibling(s) specified as publication dependency: %s',
            set(['bogus'])))
    # for real
    source.create_sibling(
        'ssh://datalad-test' + target2_path,
        name='target2',
        existing='reconfigure',  # because 'target2' is known in polluted cfg
        annex_wanted='standard',
        annex_group='backup',
        publish_depends='target1')
    # wiped out previous dependencies
    eq_(source.config.get(depvar, None), 'target1')
    # and one more remote, on the same host but associated with a dependency
    source.create_sibling(
        'ssh://datalad-test' + target3_path,
        name='target3')
    ok_clean_git(src_path)
    # introduce change in source
    create_tree(src_path, {'probe1': 'probe1'})
    source.add('probe1')
    ok_clean_git(src_path)
    # only the source has the probe
    ok_file_has_content(opj(src_path, 'probe1'), 'probe1')
    for p in (target1_path, target2_path, target3_path):
        assert_false(lexists(opj(p, 'probe1')))
    # publish to a standalone remote
    source.publish(to='target3')
    ok_(lexists(opj(target3_path, 'probe1')))
    # but it has no data copied
    target3 = Dataset(target3_path)
    nok_(target3.repo.file_has_content('probe1'))

    # but if we publish specifying its path, it gets copied
    source.publish('probe1', to='target3')
    ok_file_has_content(opj(target3_path, 'probe1'), 'probe1')

    # no others are affected in either case
    for p in (target1_path, target2_path):
        assert_false(lexists(opj(p, 'probe1')))

    # publish to all remaining, but via a dependency
    source.publish(to='target2')
    for p in (target1_path, target2_path, target3_path):
        ok_file_has_content(opj(p, 'probe1'), 'probe1')
Beispiel #52
0
def test_run_inputs_outputs(src, path):
    for subds in [("s0", "s1_0", "s2"),
                  ("s0", "s1_1", "s2"),
                  ("s0", "s1_0"),
                  ("s0", "s1_1"),
                  ("s0", "ss"),
                  ("s0",)]:
        Dataset(op.join(*((src,) + subds))).create(force=True)
    src_ds = Dataset(src).create(force=True)
    src_ds.add(".", recursive=True)

    ds = install(path, source=src,
                 result_xfm='datasets', return_type='item-or-list')
    assert_false(ds.repo.file_has_content("input.dat"))
    assert_false(ds.repo.file_has_content("extra-input.dat"))

    # The specified inputs and extra inputs will be retrieved before the run.
    # (Use run_command() to access the extra_inputs argument.)
    list(run_command("cat {inputs} {inputs} >doubled.dat",
                     dataset=ds,
                     inputs=["input.dat"], extra_inputs=["extra-input.dat"]))

    ok_clean_git(ds.path)
    ok_(ds.repo.file_has_content("input.dat"))
    ok_(ds.repo.file_has_content("extra-input.dat"))
    ok_(ds.repo.file_has_content("doubled.dat"))
    with open(opj(path, "doubled.dat")) as fh:
        content = fh.read()
        assert_in("input", content)
        assert_not_in("extra-input", content)

    # Rerunning the commit will also get the input file.
    ds.repo.drop(["input.dat", "extra-input.dat"], options=["--force"])
    assert_false(ds.repo.file_has_content("input.dat"))
    assert_false(ds.repo.file_has_content("extra-input.dat"))
    ds.rerun()
    ok_(ds.repo.file_has_content("input.dat"))
    ok_(ds.repo.file_has_content("extra-input.dat"))

    with swallow_logs(new_level=logging.WARN) as cml:
        ds.run("touch dummy", inputs=["not-there"])
        assert_in("Input does not exist: ", cml.out)

    # Test different combinations of globs and explicit files.
    inputs = ["a.dat", "b.dat", "c.txt", "d.txt"]
    create_tree(ds.path, {i: i for i in inputs})

    ds.add(".")
    ds.repo.copy_to(inputs, remote="origin")
    ds.repo.drop(inputs, options=["--force"])

    test_cases = [(["*.dat"], ["a.dat", "b.dat"]),
                  (["*.dat", "c.txt"], ["a.dat", "b.dat", "c.txt"]),
                  (["*"], inputs)]

    for idx, (inputs_arg, expected_present) in enumerate(test_cases):
        assert_false(any(ds.repo.file_has_content(i) for i in inputs))

        ds.run("touch dummy{}".format(idx), inputs=inputs_arg)
        ok_(all(ds.repo.file_has_content(f) for f in expected_present))
        # Globs are stored unexpanded by default.
        assert_in(inputs_arg[0], ds.repo.format_commit("%B"))
        ds.repo.drop(inputs, options=["--force"])

    # --input can be passed a subdirectory.
    create_tree(ds.path, {"subdir": {"a": "subdir a",
                                     "b": "subdir b"}})
    ds.add("subdir")
    ds.repo.copy_to(["subdir/a", "subdir/b"], remote="origin")
    ds.repo.drop("subdir", options=["--force"])
    ds.run("touch subdir-dummy", inputs=[opj(ds.path, "subdir")])
    ok_(all(ds.repo.file_has_content(opj("subdir", f)) for f in ["a", "b"]))

    # Inputs are specified relative to a dataset's subdirectory.
    ds.repo.drop(opj("subdir", "a"), options=["--force"])
    with chpwd(opj(path, "subdir")):
        run("touch subdir-dummy1", inputs=["a"])
    ok_(ds.repo.file_has_content(opj("subdir", "a")))

    # --input=. runs "datalad get ."
    ds.run("touch dot-dummy", inputs=["."])
    eq_(ds.repo.get_annexed_files(),
        ds.repo.get_annexed_files(with_content_only=True))
    # On rerun, we get all files, even those that weren't in the tree at the
    # time of the run.
    create_tree(ds.path, {"after-dot-run": "after-dot-run content"})
    ds.add(".")
    ds.repo.copy_to(["after-dot-run"], remote="origin")
    ds.repo.drop(["after-dot-run"], options=["--force"])
    ds.rerun("HEAD^")
    ds.repo.file_has_content("after-dot-run")

    # --output will unlock files that are present.
    ds.repo.get("a.dat")
    ds.run("echo ' appended' >>a.dat", outputs=["a.dat"])
    with open(opj(path, "a.dat")) as fh:
        eq_(fh.read(), "a.dat appended\n")

    # --output will remove files that are not present.
    ds.repo.drop(["a.dat", "d.txt"], options=["--force"])
    ds.run("echo ' appended' >>a.dat", outputs=["a.dat"])
    with open(opj(path, "a.dat")) as fh:
        eq_(fh.read(), " appended\n")

    # --input can be combined with --output.
    ds.repo.repo.git.reset("--hard", "HEAD~2")
    ds.run("echo ' appended' >>a.dat", inputs=["a.dat"], outputs=["a.dat"])
    with open(opj(path, "a.dat")) as fh:
        eq_(fh.read(), "a.dat appended\n")

    with swallow_logs(new_level=logging.DEBUG) as cml:
        ds.run("echo blah", outputs=["not-there"])
        assert_in("Filtered out non-existing path: ", cml.out)

    ds.create('sub')
    ds.run("echo sub_orig >sub/subfile")
    ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"])
    ds.drop("sub/subfile", check=False)
    ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"])

    # --input/--output globs can be stored in expanded form.
    ds.run("touch expand-dummy", inputs=["a.*"], outputs=["b.*"], expand="both")
    assert_in("a.dat", ds.repo.format_commit("%B"))
    assert_in("b.dat", ds.repo.format_commit("%B"))

    res = ds.rerun(report=True, return_type='item-or-list')
    eq_(res["run_info"]['inputs'], ["a.dat"])
    eq_(res["run_info"]['outputs'], ["b.dat"])

    # We install subdatasets to fully resolve globs.
    ds.uninstall("s0")
    assert_false(Dataset(op.join(path, "s0")).is_installed())
    ds.run("echo {inputs} >globbed-subds", inputs=["s0/s1_*/s2/*.dat"])
    ok_file_has_content(op.join(ds.path, "globbed-subds"),
                        "s0/s1_0/s2/a.dat s0/s1_1/s2/c.dat",
                        strip=True)

    ds_ss = Dataset(op.join(path, "s0", "ss"))
    assert_false(ds_ss.is_installed())
    ds.run("echo blah >{outputs}", outputs=["s0/ss/out"])
    ok_(ds_ss.is_installed())
    ok_file_has_content(op.join(ds.path, "s0", "ss", "out"),
                        "blah",
                        strip=True)