Esempio n. 1
0
def test_add_recursive(path=None):
    # make simple hierarchy
    parent = Dataset(path).create()
    assert_repo_status(parent.path)
    sub1 = parent.create(op.join('down', 'sub1'))
    assert_repo_status(parent.path)
    sub2 = parent.create('sub2')
    # next one make the parent dirty
    subsub = sub2.create('subsub')
    assert_repo_status(parent.path, modified=['sub2'])
    res = parent.save()
    assert_repo_status(parent.path)

    # now add content deep in the hierarchy
    create_tree(subsub.path, {'new': 'empty'})
    assert_repo_status(parent.path, modified=['sub2'])

    # recursive add should not even touch sub1, because
    # it knows that it is clean
    res = parent.save(recursive=True, jobs=5)
    # the key action is done
    assert_result_count(res,
                        1,
                        path=op.join(subsub.path, 'new'),
                        action='add',
                        status='ok')
    # saved all the way up
    assert_result_count(res, 3, action='save', status='ok')
    assert_repo_status(parent.path)
Esempio n. 2
0
def test_invalid_call(path=None):
    with chpwd(path):
        # ^ Change directory so that we don't fail with an
        # InvalidGitRepositoryError if the test is executed from a git
        # worktree.

        # needs a SSH URL
        assert_raises(InsufficientArgumentsError, create_sibling, '')
        assert_raises(ValueError, create_sibling, 'http://ignore.me')
        # needs an actual dataset
        assert_raises(ValueError,
                      create_sibling,
                      'datalad-test:/tmp/somewhere',
                      dataset='/nothere')
    # pre-configure a bogus remote
    ds = Dataset(path).create()
    ds.repo.add_remote('bogus', 'http://bogus.url.com')
    # fails to reconfigure by default with generated
    # and also when given an existing name
    for res in (ds.create_sibling('bogus:/tmp/somewhere', on_failure='ignore'),
                ds.create_sibling('datalad-test:/tmp/somewhere',
                                  name='bogus',
                                  on_failure='ignore')):
        assert_result_count(
            res,
            1,
            status='error',
            message=
            ("sibling '%s' already configured (specify alternative name, or force reconfiguration via --existing",
             'bogus'))

    if not have_webui():
        # need an extension package
        assert_raises(RuntimeError, ds.create_sibling, '', ui=True)
Esempio n. 3
0
    def check_addurls_from_key(self, key_arg, expected_backend, fake_dates,
                               path):
        ds = Dataset(path).create(force=True, fake_dates=fake_dates)
        if OLD_EXAMINEKEY and ds.repo.is_managed_branch():
            raise SkipTest("Adjusted branch functionality requires "
                           "more recent `git annex examinekey`")
        ds.addurls(self.json_file,
                   "{url}",
                   "{name}",
                   exclude_autometa="*",
                   key=key_arg,
                   result_renderer='disabled')
        repo = ds.repo
        repo_path = ds.repo.pathobj
        paths = [repo_path / x for x in "ac"]

        annexinfo = repo.get_content_annexinfo(eval_availability=True)
        for path in paths:
            pstat = annexinfo[path]
            eq_(pstat["backend"], expected_backend)
            assert_false(pstat["has_content"])

        get_res = ds.get(paths,
                         result_renderer='disabled',
                         on_failure="ignore")
        assert_result_count(get_res, 2, action="get", status="ok")
Esempio n. 4
0
def test_get_mixed_hierarchy(src=None, path=None):

    origin = Dataset(src).create(annex=False)
    origin_sub = origin.create('subds')
    with open(opj(origin.path, 'file_in_git.txt'), "w") as f:
        f.write('no idea')
    with open(opj(origin_sub.path, 'file_in_annex.txt'), "w") as f:
        f.write('content')
    origin.save('file_in_git.txt', to_git=True)
    origin_sub.save('file_in_annex.txt')
    origin.save()

    # now, install that thing:
    ds, subds = install(path,
                        source=src,
                        recursive=True,
                        result_xfm='datasets',
                        return_type='item-or-list',
                        result_filter=None)
    ok_(subds.repo.file_has_content("file_in_annex.txt") is False)

    # and get:
    result = ds.get(curdir, recursive=True)
    # git repo and subds
    assert_status(['ok', 'notneeded'], result)
    assert_result_count(result,
                        1,
                        path=opj(subds.path, "file_in_annex.txt"),
                        status='ok')
    ok_(subds.repo.file_has_content("file_in_annex.txt") is True)
Esempio n. 5
0
def test_no_worktree_impact_false_deletions(path=None):
    ds = Dataset(path).create()
    # create a branch that has no new content
    ds.repo.call_git(['checkout', '-b', 'test'])
    # place two successive commits with file additions into the default branch
    ds.repo.call_git(['checkout', DEFAULT_BRANCH])
    (ds.pathobj / 'identical').write_text('should be')
    ds.save()
    (ds.pathobj / 'new').write_text('yes')
    ds.save()
    # now perform a diff for the last commit, there is one file that remained
    # identifical
    ds.repo.call_git(['checkout', 'test'])
    res = ds.diff(fr=DEFAULT_BRANCH + '~1',
                  to=DEFAULT_BRANCH,
                  result_renderer='disabled')
    # under no circumstances can there be any reports on deleted files
    # because we never deleted anything
    assert_result_count(res, 0, state='deleted')
    # the identical file must be reported clean
    assert_result_count(
        res,
        1,
        state='clean',
        path=str(ds.pathobj / 'identical'),
    )
Esempio n. 6
0
def test_install_recursive_with_data(src=None, path=None):

    _make_dataset_hierarchy(src)

    # now again; with data:
    res = install(path,
                  source=src,
                  recursive=True,
                  get_data=True,
                  result_filter=None,
                  result_xfm=None)
    assert_status('ok', res)
    # installed a dataset and two subdatasets, and one file with content in
    # each
    assert_result_count(res, 5, type='dataset', action='install')
    assert_result_count(res, 2, type='file', action='get')
    # we recurse top down during installation, so toplevel should appear at
    # first position in returned list
    eq_(res[0]['path'], path)
    top_ds = YieldDatasets()(res[0])
    ok_(top_ds.is_installed())

    def all_have_content(repo):
        ainfo = repo.get_content_annexinfo(init=None, eval_availability=True)
        return all(st["has_content"] for st in ainfo.values())

    if isinstance(top_ds.repo, AnnexRepo):
        ok_(all_have_content(top_ds.repo))

    for subds in top_ds.subdatasets(recursive=True, result_xfm='datasets'):
        ok_(subds.is_installed(), "Not installed: %s" % (subds, ))
        if isinstance(subds.repo, AnnexRepo):
            ok_(all_have_content(subds.repo))
Esempio n. 7
0
def test_gh1426(origin_path=None, target_path=None):
    # set up a pair of repos, one the published copy of the other
    origin = Dataset(origin_path).create()
    target = mk_push_target(origin,
                            'target',
                            target_path,
                            annex=True,
                            bare=False)
    origin.push(to='target')
    assert_repo_status(origin.path)
    assert_repo_status(target.path)
    eq_(origin.repo.get_hexsha(DEFAULT_BRANCH),
        target.get_hexsha(DEFAULT_BRANCH))

    # gist of #1426 is that a newly added subdataset does not cause the
    # superdataset to get published
    origin.create('sub')
    assert_repo_status(origin.path)
    neq_(origin.repo.get_hexsha(DEFAULT_BRANCH),
         target.get_hexsha(DEFAULT_BRANCH))
    # now push
    res = origin.push(to='target')
    assert_result_count(res,
                        1,
                        status='ok',
                        type='dataset',
                        path=origin.path,
                        action='publish',
                        target='target',
                        operations=['fast-forward'])
    eq_(origin.repo.get_hexsha(DEFAULT_BRANCH),
        target.get_hexsha(DEFAULT_BRANCH))
Esempio n. 8
0
def test_add_files(path=None):
    ds = Dataset(path).create(force=True)

    test_list_1 = ['test_annex.txt']
    test_list_2 = ['test.txt']
    test_list_3 = ['test1.dat', 'test2.dat']
    test_list_4 = [
        op.join('dir', 'testindir'),
        op.join('dir', OBSCURE_FILENAME)
    ]

    for arg in [(test_list_1[0], False), (test_list_2[0], True),
                (test_list_3, False), (test_list_4, False)]:
        # special case 4: give the dir:
        if arg[0] == test_list_4:
            result = ds.save('dir', to_git=arg[1])
            status = ds.repo.get_content_annexinfo(['dir'])
        else:
            result = ds.save(arg[0], to_git=arg[1])
            for a in ensure_list(arg[0]):
                assert_result_count(result, 1, path=str(ds.pathobj / a))
            status = ds.repo.get_content_annexinfo(
                ut.Path(p) for p in ensure_list(arg[0]))
        for f, p in status.items():
            if arg[1]:
                assert p.get('key', None) is None, f
            else:
                assert p.get('key', None) is not None, f
Esempio n. 9
0
def test_get_subdataset_direct_fetch(path=None):
    path = Path(path)
    origin = Dataset(path / "origin").create()
    for sub in ["s0", "s1"]:
        sds = origin.create(origin.pathobj / sub)
        sds.repo.commit(msg="another commit", options=["--allow-empty"])
    origin.save()
    s0 = Dataset(origin.pathobj / "s0")
    s1 = Dataset(origin.pathobj / "s1")
    # Abandon the recorded commit so that it needs to be brought down by a
    # direct fetch.
    s0.repo.call_git(["reset", "--hard", "HEAD~"])
    s1.repo.call_git(["reset", "--hard", "HEAD~"])

    # Tweak the configuration of s0 to make the direct fetch fail.
    # Disallow direct oid fetch (default).
    s0.repo.config.set("uploadpack.allowAnySHA1InWant", "false", scope="local")
    # Configure the fetcher to avoid v2, which allows fetching unadvertised
    # objects regardless of the value of uploadpack.allowAnySHA1InWant.
    s0.repo.config.set("protocol.version", "0", scope="local")

    # Configure s1 to succeed with direct fetch.
    s1.repo.config.set("uploadpack.allowAnySHA1InWant", "true", scope="local")

    clone = install(str(path / "clone"),
                    source="ssh://datalad-test:" +
                    origin.repo.pathobj.as_posix())

    res = clone.get(["s0", "s1"], on_failure="ignore")
    assert_result_count(res,
                        1,
                        action="install",
                        type="dataset",
                        status="error")
    assert_result_count(res, 1, action="install", type="dataset", status="ok")
Esempio n. 10
0
def test_gh3356(src=None, path=None):
    # create toy version of gh-3356 scenario
    origin = Dataset(src).create()
    origin_sub = origin.create(origin.pathobj / 'subdir' / 'subds')
    for p in ((origin_sub.pathobj / 'data' / 'file_in_annex.txt'),
              (origin_sub.pathobj / 'data' / 'file_in_annex2.txt')):
        p.parent.mkdir(parents=True, exist_ok=True)
        p.write_text(p.name)
    origin.save(recursive=True)
    clone = install(path,
                    source=src,
                    result_xfm='datasets',
                    return_type='item-or-list')
    targetpaths = [
        opj('subdir', 'subds', 'data', 'file_in_annex.txt'),
        opj('subdir', 'subds', 'data', 'file_in_annex2.txt'),
    ]
    with chpwd(path):
        res = get(targetpaths)
    # get() must report success on two files
    assert_result_count(res, 2, action='get', type='file', status='ok')
    # status must report content for two files
    assert_result_count(clone.status(recursive=True, annex='all'),
                        2,
                        action='status',
                        has_content=True)
Esempio n. 11
0
def test_get_in_unavailable_subdataset(src=None, path=None):
    _make_dataset_hierarchy(src)
    root = install(path,
                   source=src,
                   result_xfm='datasets',
                   return_type='item-or-list')
    targetpath = opj('sub1', 'sub2')
    targetabspath = opj(root.path, targetpath)
    with chpwd(path):
        res = get(targetabspath)
    assert_result_count(res, 2, status='ok', action='install', type='dataset')
    # dry-fit result filter that only returns the result that matched the requested
    # path
    filtered = [r for r in res if only_matching_paths(r, path=targetabspath)]
    assert_result_count(filtered,
                        1,
                        status='ok',
                        action='install',
                        type='dataset',
                        path=targetabspath)
    # we got the dataset, and its immediate content, but nothing below
    sub2 = Dataset(targetabspath)
    ok_(sub2.is_installed())
    ok_(sub2.repo.file_has_content('file_in_annex.txt') is True)
    ok_(not Dataset(opj(targetabspath, 'sub3')).is_installed())
Esempio n. 12
0
def test_install_skip_failed_recursive(src=None, path=None):
    _mk_submodule_annex(src, fname="test-annex.dat", fcontent="whatever")

    # install top level:
    ds = install(path, source=src)
    sub1 = Dataset(opj(path, 'subm 1'))
    sub2 = Dataset(opj(path, '2'))
    # sabotage recursive installation of 'subm 1' by polluting the target:
    with open(opj(path, 'subm 1', 'blocking.txt'), "w") as f:
        f.write("sdfdsf")

    with swallow_logs(new_level=logging.WARNING) as cml:
        result = ds.get(os.curdir,
                        recursive=True,
                        on_failure='ignore',
                        result_xfm=None)
        # toplevel dataset was in the house already
        assert_result_count(result, 0, path=ds.path, type='dataset')
        # subm 1 should fail to install. [1] since comes after '2' submodule
        assert_in_results(
            result,
            status='error',
            path=sub1.path,
            type='dataset',
            message='target path already exists and not empty, refuse to '
            'clone into target path')
        assert_in_results(result, status='ok', path=sub2.path)
Esempio n. 13
0
def test_audio(path=None):
    ds = Dataset(path).create()
    ds.config.add('datalad.metadata.nativetype', 'audio', scope='branch')
    copy(
        opj(dirname(dirname(dirname(__file__))), 'tests', 'data', 'audio.mp3'),
        path)
    ds.save()
    assert_repo_status(ds.path)
    res = ds.aggregate_metadata()
    assert_status('ok', res)
    res = ds.metadata('audio.mp3')
    assert_result_count(res, 1)

    # from this extractor
    meta = res[0]['metadata']['audio']
    for k, v in target.items():
        eq_(meta[k], v)

    assert_in('@context', meta)

    uniques = ds.metadata(reporton='datasets', return_type='item-or-list'
                          )['metadata']['datalad_unique_content_properties']
    # test file has it, but uniques have it blanked out, because the extractor considers it worthless
    # for discovering whole datasets
    assert_in('bitrate', meta)
    eq_(uniques['audio']['bitrate'], None)

    # 'date' field carries not value, hence gets exclude from the unique report
    assert_in('date', meta)
    assert (not meta['date'])
    assert_not_in('date', uniques['audio'])
Esempio n. 14
0
def test_download_url_existing_dir_no_slash_exception(path=None):
    with chpwd(path):
        res = download_url('url', path="dir", save=False, on_failure='ignore')
        assert_result_count(res, 1, status='error')
        assert_message(
            "Non-directory path given (no trailing separator) "
            "but a directory with that name (after adding "
            "archive suffix) exists", res)
Esempio n. 15
0
def test_no_store(path=None):
    ds = Dataset(path).create()
    # check that we fail without '--new-store-ok' when there is no store
    assert_result_count(ds.create_sibling_ria("'ria+file:///no/where'",
                                              "datastore",
                                              on_failure='ignore'),
                        1,
                        status="error")
Esempio n. 16
0
def test_newthings_coming_down(originpath=None, destpath=None):
    origin = GitRepo(originpath, create=True)
    create_tree(originpath, {'load.dat': 'heavy'})
    Dataset(originpath).save('load.dat')
    ds = install(source=originpath,
                 path=destpath,
                 result_xfm='datasets',
                 return_type='item-or-list')
    assert_is_instance(ds.repo, GitRepo)
    assert_in(DEFAULT_REMOTE, ds.repo.get_remotes())
    # turn origin into an annex
    origin = AnnexRepo(originpath, create=True)
    # clone doesn't know yet
    assert_false(knows_annex(ds.path))
    # but after an update it should
    # no merge, only one sibling, no parameters should be specific enough
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    assert (knows_annex(ds.path))
    # no branches appeared
    eq_(ds.repo.get_branches(), [DEFAULT_BRANCH])
    # now merge, and get an annex
    assert_result_count(ds.update(merge=True),
                        1,
                        action='update',
                        status='ok',
                        type='dataset')
    assert_in('git-annex', ds.repo.get_branches())
    assert_is_instance(ds.repo, AnnexRepo)
    # should be fully functional
    testfname = opj(ds.path, 'load.dat')
    assert_false(ds.repo.file_has_content(testfname))
    ds.get('.')
    ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy')
    # check that a new tag comes down
    origin.tag('first!')
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    eq_(ds.repo.get_tags(output='name')[0], 'first!')

    # and now we destroy the remote annex
    origin.call_git(['config', '--remove-section', 'annex'])
    rmtree(opj(origin.path, '.git', 'annex'), chmod_files=True)
    origin.call_git(['branch', '-D', 'git-annex'])
    origin = GitRepo(originpath)
    assert_false(knows_annex(originpath))

    # and update the local clone
    # for now this should simply not fail (see gh-793), later might be enhanced to a
    # graceful downgrade
    before_branches = ds.repo.get_branches()
    ok_(any("git-annex" in b for b in ds.repo.get_remote_branches()))
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    eq_(before_branches, ds.repo.get_branches())
    # annex branch got pruned
    assert_false(any("git-annex" in b for b in ds.repo.get_remote_branches()))
    # check that a new tag comes down even if repo types mismatch
    origin.tag('second!')
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    eq_(ds.repo.get_tags(output='name')[-1], 'second!')
Esempio n. 17
0
def test_update_fetch_all(path=None):
    path = Path(path)
    remote_1 = str(path / "remote_1")
    remote_2 = str(path / "remote_2")

    ds = Dataset(path / "src").create()
    src = ds.repo.path

    ds_rmt1 = clone(source=src, path=remote_1)
    ds_rmt2 = clone(source=src, path=remote_2)

    ds.siblings('add', name="sibling_1", url=remote_1)
    ds.siblings('add', name="sibling_2", url=remote_2)

    # modify the remotes:
    (ds_rmt1.pathobj / "first.txt").write_text("some file load")
    ds_rmt1.save()

    # TODO: Modify an already present file!

    (ds_rmt2.pathobj / "second.txt").write_text("different file load")
    ds_rmt2.save()

    # Let's init some special remote which we couldn't really update/fetch
    if not dl_cfg.get('datalad.tests.dataladremote'):
        ds.repo.init_remote(
            'datalad',
            ['encryption=none', 'type=external', 'externaltype=datalad'])
    # fetch all remotes
    assert_result_count(ds.update(), 1, status='ok', type='dataset')

    # no merge, so changes are not in active branch:
    assert_not_in("first.txt", ds.repo.get_files(ds.repo.get_active_branch()))
    assert_not_in("second.txt", ds.repo.get_files(ds.repo.get_active_branch()))
    # but we know the changes in remote branches:
    assert_in("first.txt", ds.repo.get_files("sibling_1/" + DEFAULT_BRANCH))
    assert_in("second.txt", ds.repo.get_files("sibling_2/" + DEFAULT_BRANCH))

    # no merge strategy for multiple remotes yet:
    # more clever now, there is a tracking branch that provides a remote
    #assert_raises(NotImplementedError, ds.update, merge=True)

    # merge a certain remote:
    assert_result_count(ds.update(sibling='sibling_1', merge=True),
                        1,
                        action='update',
                        status='ok',
                        type='dataset')

    # changes from sibling_2 still not present:
    assert_not_in("second.txt", ds.repo.get_files(ds.repo.get_active_branch()))
    # changes from sibling_1 merged:
    assert_in("first.txt", ds.repo.get_files(ds.repo.get_active_branch()))
    # it's known to annex, but has no content yet:
    annexprops = ds.repo.get_file_annexinfo("first.txt",
                                            eval_availability=True)
    annexprops['key']  # blows if unknown
    eq_(False, annexprops['has_content'])
Esempio n. 18
0
def test_update_how_subds_different(path=None, *, follow, action):
    path = Path(path)
    ds_src = Dataset(path / "source").create()
    ds_src_sub = ds_src.create("sub")
    ds_src.save()

    ds_clone = install(source=ds_src.path,
                       path=path / "clone",
                       recursive=True,
                       result_xfm="datasets")
    (ds_clone.pathobj / "foo").write_text("foo")
    ds_clone.save()
    ds_clone_sub = Dataset(ds_clone.pathobj / "sub")

    (ds_src_sub.pathobj / "bar").write_text("bar")
    ds_src.save(recursive=True)

    # Add unrecorded state to make --follow=sibling/parentds differ.
    (ds_src_sub.pathobj / "baz").write_text("baz")
    ds_src_sub.save()

    ds_clone_repo = ds_clone.repo
    ds_clone_hexsha_pre = ds_clone_repo.get_hexsha()

    ds_clone_sub_repo = ds_clone_sub.repo
    ds_clone_sub_branch_pre = ds_clone_sub_repo.get_active_branch()

    res = ds_clone.update(follow=follow,
                          how="merge",
                          how_subds=action,
                          recursive=True)

    assert_result_count(res,
                        1,
                        action="merge",
                        status="ok",
                        path=ds_clone.path)
    assert_result_count(res,
                        1,
                        action=f"update.{action}",
                        status="ok",
                        path=ds_clone_sub.path)

    ds_clone_hexsha_post = ds_clone_repo.get_hexsha()
    neq_(ds_clone_hexsha_pre, ds_clone_hexsha_post)
    neq_(ds_src.repo.get_hexsha(), ds_clone_hexsha_post)
    ok_(ds_clone_repo.is_ancestor(ds_clone_hexsha_pre, ds_clone_hexsha_post))

    eq_(ds_clone_sub.repo.get_hexsha(),
        ds_src_sub.repo.get_hexsha(None if follow == "sibling" else "HEAD~"))
    ds_clone_sub_branch_post = ds_clone_sub_repo.get_active_branch()

    if action == "checkout":
        neq_(ds_clone_sub_branch_pre, ds_clone_sub_branch_post)
        assert_false(ds_clone_sub_branch_post)
    else:
        eq_(ds_clone_sub_branch_pre, ds_clone_sub_branch_post)
Esempio n. 19
0
def test_diff_nonexistent_ref_unicode(path=None):
    ds = Dataset(path).create()
    assert_result_count(ds.diff(fr="HEAD",
                                to=u"β",
                                on_failure="ignore",
                                result_renderer='disabled'),
                        1,
                        path=ds.path,
                        status="impossible")
Esempio n. 20
0
def test_file_extraction(path=None):
    skip_if_no_module('libxmp')

    # go into virgin dir to avoid detection of any dataset
    with chpwd(path):
        res = extract_metadata(
            types=['xmp'],
            files=[testpath])
        assert_result_count(res, 1, type='file', status='ok', action='metadata', path=testpath)
        assert_in('xmp', res[0]['metadata'])
Esempio n. 21
0
def test_ephemeral(ds_path=None, store_path=None, clone_path=None):

    dspath = Path(ds_path)
    store = Path(store_path)
    file_test = Path('file1.txt')
    file_testsub = Path('sub') / 'other.txt'

    # create the original dataset
    ds = Dataset(dspath)
    ds.create(force=True)
    ds.save()

    # put into store:
    ds.create_sibling_ria("ria+{}".format(store.as_uri()),
                          "riastore",
                          new_store_ok=True)
    ds.push(to="riastore", data="anything")

    # now, get an ephemeral clone from the RIA store:
    eph_clone = clone('ria+{}#{}'.format(store.as_uri(), ds.id),
                      clone_path,
                      reckless="ephemeral")

    # ephemeral clone was properly linked (store has bare repos!):
    clone_annex = (eph_clone.repo.dot_git / 'annex')
    assert_true(clone_annex.is_symlink())
    assert_true(clone_annex.resolve().samefile(store / ds.id[:3] / ds.id[3:] /
                                               'annex'))
    if not eph_clone.repo.is_managed_branch():
        # TODO: We can't properly handle adjusted branch yet
        # we don't need to get files in order to access them:
        assert_equal((eph_clone.pathobj / file_test).read_text(), "some")
        assert_equal((eph_clone.pathobj / file_testsub).read_text(), "other")

        # can we unlock those files?
        eph_clone.unlock(file_test)
        # change content
        (eph_clone.pathobj / file_test).write_text("new content")
        eph_clone.save()

        # new content should already be in store
        # (except the store doesn't know yet)
        res = eph_clone.repo.fsck(remote="riastore-storage", fast=True)
        assert_equal(len(res), 2)
        assert_result_count(res, 1, success=True, file=file_test.as_posix())
        assert_result_count(res, 1, success=True, file=file_testsub.as_posix())

        # push back git history
        eph_clone.push(to=DEFAULT_REMOTE, data="nothing")

        # get an update in origin
        ds.update(merge=True, reobtain_data=True)
        assert_equal((ds.pathobj / file_test).read_text(), "new content")
Esempio n. 22
0
def test_get_relays_command_errors(path=None):
    ds = Dataset(path).create()
    (ds.pathobj / "foo").write_text("foo")
    ds.save()
    ds.drop("foo", reckless='kill')
    assert_result_count(ds.get("foo",
                               on_failure="ignore",
                               result_renderer='disabled'),
                        1,
                        action="get",
                        type="file",
                        status="error")
Esempio n. 23
0
def test_update_strategy(path=None):
    base = Dataset(opj(path, 'origin')).create(force=True)
    # force all metadata objects into the annex
    with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f:
        f.write(
            '** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n'
        )
    sub = base.create('sub', force=True)
    subsub = sub.create(opj('subsub'), force=True)
    base.save(recursive=True)
    assert_repo_status(base.path)
    # we start clean
    for ds in base, sub, subsub:
        eq_(len(_get_contained_objs(ds)), 0)
    # aggregate the base dataset only, nothing below changes
    base.aggregate_metadata()
    eq_(len(_get_contained_objs(base)), 2)
    for ds in sub, subsub:
        eq_(len(_get_contained_objs(ds)), 0)
    # aggregate the entire tree, but by default only updates
    # the top-level dataset with all objects, none of the leaf
    # or intermediate datasets gets touched
    base.aggregate_metadata(recursive=True)
    eq_(len(_get_contained_objs(base)), 6)
    eq_(len(_get_referenced_objs(base)), 6)
    for ds in sub, subsub:
        eq_(len(_get_contained_objs(ds)), 0)
    res = base.metadata(get_aggregates=True)
    assert_result_count(res, 3)
    # it is impossible to query an intermediate or leaf dataset
    # for metadata
    for ds in sub, subsub:
        assert_status('impossible',
                      ds.metadata(get_aggregates=True, on_failure='ignore'))
    # get the full metadata report
    target_meta = base.metadata(return_type='list')

    # now redo full aggregation, this time updating all
    # (intermediate) datasets
    base.aggregate_metadata(recursive=True, update_mode='all')
    eq_(len(_get_contained_objs(base)), 6)
    eq_(len(_get_contained_objs(sub)), 4)
    eq_(len(_get_contained_objs(subsub)), 2)
    # it is now OK to query an intermediate or leaf dataset
    # for metadata
    for ds in sub, subsub:
        assert_status('ok',
                      ds.metadata(get_aggregates=True, on_failure='ignore'))

    # all of that has no impact on the reported metadata
    eq_(target_meta, base.metadata(return_type='list'))
Esempio n. 24
0
def test_download_url_return(toppath=None, topurl=None, outdir=None):
    # Ensure that out directory has trailing slash.
    outdir = opj(outdir, "")
    files = ['file1.txt', 'file2.txt']
    urls = [topurl + f for f in files]
    outfiles = [opj(outdir, f) for f in files]

    out1 = download_url(urls[0], path=outdir, save=False)
    assert_result_count(out1, 1)
    eq_(out1[0]['path'], outfiles[0])

    # can't overwrite
    out2 = download_url(urls, path=outdir, on_failure='ignore', save=False)
    assert_result_count(out2, 1, status='error')
    assert_in('file1.txt already exists', out2[0]['message'])
    assert_result_count(out2, 1, status='ok')  # only 2nd one
    eq_(out2[1]['path'], outfiles[1])

    out3 = download_url(urls,
                        path=outdir,
                        overwrite=True,
                        on_failure='ignore',
                        save=False)
    assert_result_count(out3, 2, status='ok')
    eq_([r['path'] for r in out3], outfiles)
Esempio n. 25
0
def test_ds_extraction(path=None):
    skip_if_no_module('libxmp')

    ds = Dataset(path).create()
    copy(testpath, path)
    ds.save()
    assert_repo_status(ds.path)

    res = extract_metadata(
        types=['xmp'],
        dataset=ds,
        # artificially disable extraction from any file in the dataset
        files=[])
    assert_result_count(
        res, 1,
        type='dataset', status='ok', action='metadata', path=path, refds=ds.path)
    assert_in('xmp', res[0]['metadata'])

    # now the more useful case: getting everything for xmp from a dataset
    res = extract_metadata(
        types=['xmp'],
        dataset=ds)
    assert_result_count(res, 2)
    assert_result_count(
        res, 1,
        type='dataset', status='ok', action='metadata', path=path, refds=ds.path)
    assert_result_count(
        res, 1,
        type='file', status='ok', action='metadata', path=opj(path, 'xmp.pdf'),
        parentds=ds.path)
    for r in res:
        assert_in('xmp', r['metadata'])
Esempio n. 26
0
    def test_drop_after(self=None, path=None):
        ds = Dataset(path).create(force=True)
        ds.repo.set_gitattributes([('a*', {'annex.largefiles': 'nothing'})])
        # make some files go to git, so we could test that we do not blow
        # while trying to drop what is in git not annex
        res = ds.addurls(self.json_file,
                         '{url}',
                         '{name}',
                         drop_after=True,
                         result_renderer='disabled')

        assert_result_count(res, 3, action='addurl',
                            status='ok')  # a, b, c  even if a goes to git
        assert_result_count(res, 2, action='drop', status='ok')  # b, c
Esempio n. 27
0
def test_archive(path=None):
    ds = Dataset(opj(path, 'ds')).create(force=True)
    ds.save()
    committed_date = ds.repo.get_commit_date()
    default_outname = opj(path, 'datalad_{}.tar.gz'.format(ds.id))
    with chpwd(path):
        res = list(ds.export_archive())
        assert_status('ok', res)
        assert_result_count(res, 1)
        assert (isabs(res[0]['path']))
    assert_true(os.path.exists(default_outname))
    custom_outname = opj(path, 'myexport.tar.gz')
    # feed in without extension
    ds.export_archive(filename=custom_outname[:-7])
    assert_true(os.path.exists(custom_outname))
    custom1_md5 = md5sum(custom_outname)
    # encodes the original archive filename -> different checksum, despit
    # same content
    assert_not_equal(md5sum(default_outname), custom1_md5)
    # should really sleep so if they stop using time.time - we know
    time.sleep(1.1)
    ds.export_archive(filename=custom_outname)
    # should not encode mtime, so should be identical
    assert_equal(md5sum(custom_outname), custom1_md5)

    def check_contents(outname, prefix):
        with tarfile.open(outname) as tf:
            nfiles = 0
            for ti in tf:
                # any annex links resolved
                assert_false(ti.issym())
                ok_startswith(ti.name, prefix + '/')
                assert_equal(ti.mtime, committed_date)
                if '.datalad' not in ti.name:
                    # ignore any files in .datalad for this test to not be
                    # susceptible to changes in how much we generate a meta info
                    nfiles += 1
            # we have exactly four files (includes .gitattributes for default
            # MD5E backend), and expect no content for any directory
            assert_equal(nfiles, 4)

    check_contents(default_outname, 'datalad_%s' % ds.id)
    check_contents(custom_outname, 'myexport')

    # now loose some content
    ds.drop('file_up', reckless='kill')
    assert_raises(IOError, ds.export_archive, filename=opj(path, 'my'))
    ds.export_archive(filename=opj(path, 'partial'), missing_content='ignore')
    assert_true(os.path.exists(opj(path, 'partial.tar.gz')))
Esempio n. 28
0
def _compare_metadata_helper(origres, compds):
    for ores in origres:
        rpath = relpath(ores['path'], ores['refds'])
        cres = compds.metadata(
            rpath,
            reporton='{}s'.format(ores['type']))
        if ores['type'] == 'file':
            # TODO implement file based lookup
            continue
        assert_result_count(cres, 1)
        cres = cres[0]
        assert_dict_equal(ores['metadata'], cres['metadata'])
        if ores['type'] == 'dataset':
            for i in ('dsid', ):
                eq_(ores[i], cres[i])
Esempio n. 29
0
def test_push_git_annex_branch_many_paths_same_data(path=None):
    path = Path(path)
    ds = Dataset(path / "ds").create(force=True)
    ds.save()
    mk_push_target(ds, "target", str(path / "target"), annex=True, bare=False)
    nbytes = sum(
        ds.repo.get_content_annexinfo(paths=[f])[f]["bytesize"] for f in [
            ds.repo.pathobj / "f0", ds.repo.pathobj / "f3", ds.repo.pathobj /
            "f4"
        ])
    with swallow_logs(new_level=logging.DEBUG) as cml:
        res = ds.push(to="target")
    assert_in("{} bytes of annex data".format(nbytes), cml.out)
    # 3 files point to content already covered by another file.
    assert_result_count(res, 3, action="copy", type="file", status="notneeded")
Esempio n. 30
0
def test_update_git_smoke(src_path=None, dst_path=None):
    # Apparently was just failing on git repos for basic lack of coverage, hence this quick test
    ds = Dataset(src_path).create(annex=False)
    target = install(dst_path,
                     source=src_path,
                     result_xfm='datasets',
                     return_type='item-or-list')
    create_tree(ds.path, {'file.dat': '123'})
    ds.save('file.dat')
    assert_result_count(target.update(recursive=True, merge=True),
                        1,
                        action='update',
                        status='ok',
                        type='dataset')
    ok_file_has_content(opj(target.path, 'file.dat'), '123')