Beispiel #1
0
def test_install_simple_local(src, path):
    origin = Dataset(path)

    # now install it somewhere else
    ds = install(path, source=src, description='mydummy')
    eq_(ds.path, path)
    ok_(ds.is_installed())
    if not isinstance(origin.repo, AnnexRepo):
        # this means it is a GitRepo
        ok_(isinstance(origin.repo, GitRepo))
        # stays plain Git repo
        ok_(isinstance(ds.repo, GitRepo))
        ok_(not isinstance(ds.repo, AnnexRepo))
        ok_(GitRepo.is_valid_repo(ds.path))
        eq_(set(ds.repo.get_indexed_files()),
            {'test.dat', 'INFO.txt'})
        ok_clean_git(path, annex=False)
    else:
        # must be an annex
        ok_(isinstance(ds.repo, AnnexRepo))
        ok_(AnnexRepo.is_valid_repo(ds.path, allow_noninitialized=False))
        eq_(set(ds.repo.get_indexed_files()),
            {'test.dat', 'INFO.txt', 'test-annex.dat'})
        ok_clean_git(path, annex=True)
        # no content was installed:
        ok_(not ds.repo.file_has_content('test-annex.dat'))
        uuid_before = ds.repo.uuid
        eq_(ds.repo.get_description(), 'mydummy')

    # installing it again, shouldn't matter:
    res = install(path, source=src, result_xfm=None, return_type='list')
    assert_status('notneeded', res)
    ok_(ds.is_installed())
    if isinstance(origin.repo, AnnexRepo):
        eq_(uuid_before, ds.repo.uuid)
def fetch_data(tmpdir, dicoms):
    """Fetches some test DICOMs using datalad"""
    data = os.path.join(tmpdir, 'data')
    api.install(path=data, source=DICOM_DIR)
    data = os.path.join(data, dicoms)
    api.get(path=data)
    return data
Beispiel #3
0
def test_recurse_existing(src, path):
    origin_ds = _make_dataset_hierarchy(src)

    # make sure recursion_limit works as expected across a range of depths
    for depth in range(len(origin_ds)):
        datasets = assure_list(
            install(path, source=src, recursive=True, recursion_limit=depth))
        # we expect one dataset per level
        eq_(len(datasets), depth + 1)
        rmtree(path)

    # now install all but the last two levels, no data
    root, sub1, sub2 = install(path, source=src, recursive=True, recursion_limit=2)
    ok_(sub2.repo.file_has_content('file_in_annex.txt') is False)
    sub3 = Dataset(opj(sub2.path, 'sub3'))
    ok_(not sub3.is_installed())
    # now get all content in all existing datasets, no new datasets installed
    # in the process
    files = root.get(curdir, recursive=True, recursion_limit='existing')
    eq_(len(files), 1)
    ok_(sub2.repo.file_has_content('file_in_annex.txt') is True)
    ok_(not sub3.is_installed())
    # now pull down all remaining datasets, no data
    sub3, sub4 = root.get(curdir, recursive=True, get_data=False)
    ok_(sub4.is_installed())
    ok_(sub3.repo.file_has_content('file_in_annex.txt') is False)
    # aaannd all data
    files = root.get(curdir, recursive=True)
    eq_(len(files), 1)
    ok_(sub3.repo.file_has_content('file_in_annex.txt') is True)
Beispiel #4
0
def fetch_data(tmpdir, subject):
    """Fetches some test dicoms using datalad"""
    from datalad import api
    targetdir = op.join(tmpdir, 'QA')
    api.install(path=targetdir, source='http://datasets-tests.datalad.org/dbic/QA')
    api.get('{}/sourcedata/{}'.format(targetdir, subject))
    return targetdir
Beispiel #5
0
def test_install_crcns(tdir, ds_path):
    with chpwd(tdir):
        with swallow_logs(new_level=logging.INFO) as cml:
            install("all-nonrecursive", source='///')
            # since we didn't log decorations such as log level atm while
            # swallowing so lets check if exit code is returned or not
            # I will test both
            assert_not_in('ERROR', cml.out)
            # below one must not fail alone! ;)
            assert_not_in('with exit code', cml.out)

        # should not hang in infinite recursion
        with chpwd('all-nonrecursive'):
            get("crcns")
        ok_(exists(_path_("all-nonrecursive/crcns/.git/config")))
        # and we could repeat installation and get the same result
        ds1 = install(_path_("all-nonrecursive/crcns"))
        ds2 = Dataset('all-nonrecursive').install('crcns')
        ok_(ds1.is_installed())
        eq_(ds1, ds2)
        eq_(ds1.path, ds2.path)  # to make sure they are a single dataset

    # again, but into existing dataset:
    ds = create(ds_path)
    crcns = ds.install("///crcns")
    ok_(crcns.is_installed())
    eq_(crcns.path, opj(ds_path, "crcns"))
    assert_in(crcns.path, ds.get_subdatasets(absolute=True))
Beispiel #6
0
def test_implicit_install(src, dst):

    origin_top = create(src)
    origin_sub = origin_top.create("sub")
    origin_subsub = origin_sub.create("subsub")
    with open(opj(origin_top.path, "file1.txt"), "w") as f:
        f.write("content1")
    origin_top.add("file1.txt")
    with open(opj(origin_sub.path, "file2.txt"), "w") as f:
        f.write("content2")
    origin_sub.add("file2.txt")
    with open(opj(origin_subsub.path, "file3.txt"), "w") as f:
        f.write("content3")
    origin_subsub.add("file3.txt")
    origin_top.save(recursive=True)

    # first, install toplevel:
    ds = install(dst, source=src)
    ok_(ds.is_installed())

    sub = Dataset(opj(ds.path, "sub"))
    ok_(not sub.is_installed())
    subsub = Dataset(opj(sub.path, "subsub"))
    ok_(not subsub.is_installed())

    # fail on obscure non-existing one
    assert_raises(IncompleteResultsError, ds.install, source='obscure')

    # install 3rd level and therefore implicitly the 2nd:
    result = ds.install(path=opj("sub", "subsub"))
    ok_(sub.is_installed())
    ok_(subsub.is_installed())
    # but by default implicit results are not reported
    eq_(result, subsub)

    # fail on obscure non-existing one in subds
    assert_raises(IncompleteResultsError, ds.install, source=opj('sub', 'obscure'))

    # clean up, the nasty way
    rmtree(dst, chmod_files=True)
    ok_(not exists(dst))

    # again first toplevel:
    ds = install(dst, source=src)
    ok_(ds.is_installed())
    sub = Dataset(opj(ds.path, "sub"))
    ok_(not sub.is_installed())
    subsub = Dataset(opj(sub.path, "subsub"))
    ok_(not subsub.is_installed())

    # now implicit but without an explicit dataset to install into
    # (deriving from CWD):
    with chpwd(dst):
        # don't ask for the file content to make return value comparison
        # simpler
        result = get(path=opj("sub", "subsub"), get_data=False, result_xfm='datasets')
        ok_(sub.is_installed())
        ok_(subsub.is_installed())
        eq_(result, [sub, subsub])
Beispiel #7
0
def test_remove_nowhining(path):
    # when removing a dataset under a dataset (but not a subdataset)
    # should not provide a meaningless message that something was not right
    ds = create(path)
    # just install/clone inside of it
    subds_path = _path_(path, 'subds')
    install(subds_path, source=path)
    remove(subds_path)  # should remove just fine
Beispiel #8
0
def test_autoresolve_multiple_datasets(src, path):
    with chpwd(path):
        ds1 = install('ds1', source=src)
        ds2 = install('ds2', source=src)
        results = get([opj('ds1', 'test-annex.dat')] + glob(opj('ds2', '*.dat')))
        # each ds has one file
        eq_(len(results), 2)
        ok_(ds1.repo.file_has_content('test-annex.dat') is True)
        ok_(ds2.repo.file_has_content('test-annex.dat') is True)
Beispiel #9
0
def test_update_simple(origin, src_path, dst_path):

    # prepare src
    source = install(src_path, source=origin, recursive=True)[0]
    # forget we cloned it (provide no 'origin' anymore), which should lead to
    # setting tracking branch to target:
    source.repo.remove_remote("origin")

    # get a clone to update later on:
    dest = install(dst_path, source=src_path, recursive=True)[0]
    # test setup done;
    # assert all fine
    ok_clean_git(dst_path)
    ok_clean_git(src_path)

    # update yields nothing => up-to-date
    # TODO: how to test besides not failing?
    dest.update()
    ok_clean_git(dst_path)

    # modify origin:
    with open(opj(src_path, "update.txt"), "w") as f:
        f.write("Additional content")
    source.add(path="update.txt")
    source.save("Added update.txt")
    ok_clean_git(src_path)

    # update without `merge` only fetches:
    dest.update()
    # modification is not known to active branch:
    assert_not_in("update.txt",
                  dest.repo.get_files(dest.repo.get_active_branch()))
    # modification is known to branch origin/master
    assert_in("update.txt", dest.repo.get_files("origin/master"))

    # merge:
    dest.update(merge=True)
    # modification is now known to active branch:
    assert_in("update.txt",
              dest.repo.get_files(dest.repo.get_active_branch()))
    # it's known to annex, but has no content yet:
    dest.repo.get_file_key("update.txt")  # raises if unknown
    eq_([False], dest.repo.file_has_content(["update.txt"]))

    # smoke-test if recursive update doesn't fail if submodule is removed
    dest.remove('subm 1')
    dest.update(recursive=True)
    dest.update(merge=True, recursive=True)

    # and now test recursive update with merging in differences
    create_tree(opj(source.path, 'subm 2'), {'load.dat': 'heavy'})
    source.save(message="saving changes within subm2",
                recursive=True, all_changes=True)
    dest.update(merge=True, recursive=True)
    # and now we can get new file
    dest.get('subm 2/load.dat')
    ok_file_has_content(opj(dest.path, 'subm 2', 'load.dat'), 'heavy')
Beispiel #10
0
def test_install_from_tilda(opath, tpath):
    ds = create(opath)
    ds.create('sub ds')
    orelpath = os.path.join(
        '~',
        os.path.relpath(opath, os.path.expanduser('~'))
    )
    assert orelpath.startswith('~')  # just to make sure no normalization
    install(tpath, source=orelpath, recursive=True)
    assert Dataset(opj(tpath, 'sub ds')).is_installed()
Beispiel #11
0
def test_update_simple(origin, src_path, dst_path):

    # prepare src
    source = install(path=src_path, source=origin, recursive=True)
    # TODO: For now, circumnavigate the detached head issue.
    # Figure out, what to do.
    for subds in source.get_dataset_handles(recursive=True):
        AnnexRepo(opj(src_path, subds), init=True,
                  create=True).git_checkout("master")
    # forget we cloned it (provide no 'origin' anymore), which should lead to
    # setting tracking branch to target:
    source.repo.git_remote_remove("origin")

    # get a clone to update later on:
    dest = install(path=dst_path, source=src_path, recursive=True)
    # TODO: For now, circumnavigate the detached head issue.
    # Figure out, what to do.
    for subds in dest.get_dataset_handles(recursive=True):
        AnnexRepo(opj(dst_path, subds), init=True,
                  create=True).git_checkout("master")
    # test setup done;
    # assert all fine
    ok_clean_git(dst_path)
    ok_clean_git(src_path)

    # update yields nothing => up-to-date
    # TODO: how to test besides not failing?
    dest.update()
    ok_clean_git(dst_path)

    # modify origin:
    with open(opj(src_path, "update.txt"), "w") as f:
        f.write("Additional content")
    source.install(path="update.txt")
    source.remember_state("Added update.txt")
    ok_clean_git(src_path)

    # update without `merge` only fetches:
    dest.update()
    # modification is not known to active branch:
    assert_not_in("update.txt",
                  dest.repo.git_get_files(dest.repo.git_get_active_branch()))
    # modification is known to branch origin/master
    assert_in("update.txt", dest.repo.git_get_files("origin/master"))

    # merge:
    dest.update(merge=True)
    # modification is now known to active branch:
    assert_in("update.txt",
              dest.repo.git_get_files(dest.repo.git_get_active_branch()))
    # it's known to annex, but has no content yet:
    dest.repo.get_file_key("update.txt")  # raises if unknown
    eq_([False], dest.repo.file_has_content(["update.txt"]))
Beispiel #12
0
def test_install_consistent_state(src, dest, dest2, dest3):
    # if we install a dataset, where sub-dataset "went ahead" in that branch,
    # while super-dataset was not yet updated (e.g. we installed super before)
    # then it is desired to get that default installed branch to get to the
    # position where previous location was pointing to.
    # It is indeed a mere heuristic which might not hold the assumption in some
    # cases, but it would work for most simple and thus mostly used ones
    ds1 = create(src)
    sub1 = ds1.create('sub1')

    def check_consistent_installation(ds):
        datasets = [ds] + list(
            map(Dataset, ds.subdatasets(recursive=True, fulfilled=True,
                                        result_xfm='paths')))
        assert len(datasets) == 2  # in this test
        for ds in datasets:
            # all of them should be in master branch
            eq_(ds.repo.get_active_branch(), "master")
            # all of them should be clean, so sub should be installed in a "version"
            # as pointed by the super
            ok_(not ds.repo.dirty)

    dest_ds = install(dest, source=src)
    # now we progress sub1 by adding sub2
    subsub2 = sub1.create('sub2')

    # and progress subsub2 forward to stay really thorough
    put_file_under_git(subsub2.path, 'file.dat', content="data")
    subsub2.save("added a file")  # above function does not commit

    # just installing a submodule -- apparently different code/logic
    # but also the same story should hold - we should install the version pointed
    # by the super, and stay all clean
    dest_sub1 = dest_ds.install('sub1')
    check_consistent_installation(dest_ds)

    # So now we have source super-dataset "dirty" with sub1 progressed forward
    # Our install should try to "retain" consistency of the installation
    # whenever possible.

    # install entire hierarchy without specifying dataset
    # no filter, we want full report
    dest2_ds = install(dest2, source=src, recursive=True, result_filter=None)
    check_consistent_installation(dest2_ds[0])  # [1] is the subdataset

    # install entire hierarchy by first installing top level ds
    # and then specifying sub-dataset
    dest3_ds = install(dest3, source=src, recursive=False)
    # and then install both submodules recursively while pointing
    # to it based on dest3_ds
    dest3_ds.install('sub1', recursive=True)
    check_consistent_installation(dest3_ds)
Beispiel #13
0
def test_autoresolve_multiple_datasets(src, path):
    with chpwd(path):
        ds1 = install(
            'ds1', source=src,
            result_xfm='datasets', return_type='item-or-list')
        ds2 = install(
            'ds2', source=src,
            result_xfm='datasets', return_type='item-or-list')
        results = get([opj('ds1', 'test-annex.dat')] + glob(opj('ds2', '*.dat')))
        # each ds has one file
        assert_result_count(results, 2, type='file', action='get', status='ok')
        ok_(ds1.repo.file_has_content('test-annex.dat') is True)
        ok_(ds2.repo.file_has_content('test-annex.dat') is True)
Beispiel #14
0
def test_install_subds_with_space(opath, tpath):
    ds = create(opath)
    ds.create('sub ds')
    # works even now, boring
    # install(tpath, source=opath, recursive=True)
    if on_windows:
        # on windows we cannot simply prepend localhost: to a path
        # and get a working sshurl...
        install(tpath, source=opath, recursive=True)
    else:
        # do via ssh!
        install(tpath, source="localhost:" + opath, recursive=True)
    assert Dataset(opj(tpath, 'sub ds')).is_installed()
Beispiel #15
0
def test_install_skip_list_arguments(src, path, path_outside):
    ds = install(path, source=src)
    ok_(ds.is_installed())

    # install a list with valid and invalid items:
    result = ds.install(
        path=['subm 1', 'not_existing', path_outside, '2'],
        get_data=False,
        on_failure='ignore', result_xfm=None, return_type='list')
    # good and bad results together
    ok_(isinstance(result, list))
    eq_(len(result), 4)
    # check that we have an 'impossible' status for both invalid args
    # but all the other tasks have been accomplished
    for skipped, msg in [(opj(ds.path, 'not_existing'), "path does not exist"),
                         (path_outside, "path not associated with any dataset")]:
        assert_result_count(
            result, 1, status='impossible', message=msg, path=skipped)
    for sub in [Dataset(opj(path, 'subm 1')), Dataset(opj(path, '2'))]:
        assert_result_count(
            result, 1, status='ok',
            message=('Installed subdataset in order to get %s', sub.path))
        ok_(sub.is_installed())

    # return of get is always a list, by default, even if just one thing was gotten
    # in this case 'subm1' was already obtained above, so this will get this
    # content of the subdataset
    with assert_raises(IncompleteResultsError) as cme:
        ds.install(path=['subm 1', 'not_existing'])
    with assert_raises(IncompleteResultsError) as cme:
        ds.get(path=['subm 1', 'not_existing'])
Beispiel #16
0
def test_add_subdataset(path, other):
    subds = create(opj(path, 'dir'), force=True)
    ds = create(path, force=True)
    ok_(subds.repo.dirty)
    ok_(ds.repo.dirty)
    assert_not_in('dir', ds.subdatasets(result_xfm='relpaths'))
    # without a base dataset the next is interpreted as "add everything
    # in subds to subds"
    add(subds.path)
    ok_clean_git(subds.path)
    assert_not_in('dir', ds.subdatasets(result_xfm='relpaths'))
    # but with a base directory we add the dataset subds as a subdataset
    # to ds
    ds.add(subds.path)
    assert_in('dir', ds.subdatasets(result_xfm='relpaths'))
    #  create another one
    other = create(other)
    # install into superdataset, but don't add
    other_clone = install(source=other.path, path=opj(ds.path, 'other'))
    ok_(other_clone.is_installed)
    assert_not_in('other', ds.subdatasets(result_xfm='relpaths'))
    # now add, it should pick up the source URL
    ds.add('other')
    # and that is why, we can reobtain it from origin
    ds.uninstall('other')
    ok_(other_clone.is_installed)
    ds.get('other')
    ok_(other_clone.is_installed)
Beispiel #17
0
def test_install_list(path, top_path):

    # we want to be able to install several things, if these are known
    # (no 'source' allowed). Therefore first toplevel:
    ds = install(top_path, source=path, recursive=False)
    assert_not_in('annex.hardlink', ds.config)
    ok_(ds.is_installed())
    sub1 = Dataset(opj(top_path, 'subm 1'))
    sub2 = Dataset(opj(top_path, '2'))
    ok_(not sub1.is_installed())
    ok_(not sub2.is_installed())

    # fails, when `source` is passed:
    assert_raises(ValueError, ds.install,
                  path=['subm 1', '2'],
                  source='something')

    # now should work:
    result = ds.install(path=['subm 1', '2'], result_xfm='paths')
    ok_(sub1.is_installed())
    ok_(sub2.is_installed())
    eq_(set(result), {sub1.path, sub2.path})
    # and if we request it again via get, result should be empty
    get_result = ds.get(path=['subm 1', '2'], get_data=False)
    assert_status('notneeded', get_result)
Beispiel #18
0
def test_get_recurse_dirs(o_path, c_path):

    # prepare source:
    origin = Dataset(o_path).create(force=True)
    origin.add('.')

    ds = install(
        c_path, source=o_path,
        result_xfm='datasets', return_type='item-or-list')

    file_list = ['file1.txt',
                 opj('subdir', 'file2.txt'),
                 opj('subdir', 'subsubdir', 'file3.txt'),
                 opj('subdir', 'subsubdir', 'file4.txt')]
    files_in_sub = [f for f in file_list if f.startswith(with_pathsep('subdir'))]

    # no content present:
    ok_(not any(ds.repo.file_has_content(file_list)))

    result = ds.get('subdir')

    # check result:
    assert_status('ok', result)
    eq_(set([item.get('path')[len(ds.path) + 1:] for item in result
             if item['type'] == 'file']),
        set(files_in_sub))
    # we also get one report on the subdir
    eq_(len(result) - 1, len(files_in_sub))

    # got all files beneath subdir:
    ok_(all(ds.repo.file_has_content(files_in_sub)))

    # additionally got file1.txt silently, since it has the same content as
    # subdir/subsubdir/file4.txt:
    ok_(ds.repo.file_has_content('file1.txt') is True)
Beispiel #19
0
def test_install_known_subdataset(src, path):

    # get the superdataset:
    ds = install(path, source=src)
    # subdataset not installed:
    subds = Dataset(opj(path, 'subm 1'))
    assert_false(subds.is_installed())
    assert_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths'))
    assert_not_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths'))
    # install it:
    ds.install('subm 1')
    ok_(subds.is_installed())
    ok_(AnnexRepo.is_valid_repo(subds.path, allow_noninitialized=False))
    # Verify that it is the correct submodule installed and not
    # new repository initiated
    eq_(set(subds.repo.get_indexed_files()),
        {'test.dat', 'INFO.txt', 'test-annex.dat'})
    assert_not_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths'))
    assert_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths'))

    # now, get the data by reinstalling with -g:
    ok_(subds.repo.file_has_content('test-annex.dat') is False)
    with chpwd(ds.path):
        result = get(path='subm 1', dataset=os.curdir)
        assert_in_results(result, path=opj(subds.path, 'test-annex.dat'))
        ok_(subds.repo.file_has_content('test-annex.dat') is True)
        ok_(subds.is_installed())
Beispiel #20
0
def test_install_skip_list_arguments(src, path, path_outside):
    ds = install(path, source=src)
    ok_(ds.is_installed())

    # install a list with valid and invalid items:
    with swallow_logs(new_level=logging.WARNING) as cml:
        with assert_raises(IncompleteResultsError) as cme:
            ds.install(
                path=['subm 1', 'not_existing', path_outside, 'subm 2'],
                get_data=False)
        result = cme.exception.results
        for skipped in [opj(ds.path, 'not_existing'), path_outside]:
            cml.assert_logged(msg="ignored non-existing paths: {}\n".format(
                              [opj(ds.path, 'not_existing'), path_outside]),
                              regex=False, level='WARNING')
            pass
        ok_(isinstance(result, list))
        eq_(len(result), 2)
        for sub in [Dataset(opj(path, 'subm 1')), Dataset(opj(path, 'subm 2'))]:
            assert_in(sub, result)
            ok_(sub.is_installed())

    # return of get is always a list, even if just one thing was gotten
    # in this case 'subm1' was already obtained above, so this will get this
    # content of the subdataset
    with assert_raises(IncompleteResultsError) as cme:
        ds.install(path=['subm 1', 'not_existing'])
    with assert_raises(IncompleteResultsError) as cme:
        ds.get(path=['subm 1', 'not_existing'])
    result = cme.exception.results
    eq_(len(result), 1)
    eq_(result[0]['file'], 'subm 1/test-annex.dat')
Beispiel #21
0
def test_get_flexible_source_candidates_for_submodule(t, t2):
    f = _get_flexible_source_candidates_for_submodule
    # for now without mocking -- let's just really build a dataset
    from datalad.api import create
    from datalad.api import install
    ds = create(t)
    clone = install(t2, source=t)

    # first one could just know about itself or explicit url provided
    sshurl = 'ssh://e.c'
    httpurl = 'http://e.c'
    sm_httpurls = [httpurl, httpurl + '/.git']
    eq_(f(ds, 'sub'), [])
    eq_(f(ds, 'sub', sshurl), [sshurl])
    eq_(f(ds, 'sub', httpurl), sm_httpurls)
    eq_(f(ds, 'sub', None), [])  # otherwise really we have no clue were to get from

    # but if we work on dsclone then it should also add urls deduced from its
    # own location default remote for current branch
    eq_(f(clone, 'sub'), [t + '/sub'])
    eq_(f(clone, 'sub', sshurl), [t + '/sub', sshurl])
    eq_(f(clone, 'sub', httpurl), [t + '/sub'] + sm_httpurls)
    eq_(f(clone, 'sub'), [t + '/sub'])  # otherwise really we have no clue were to get from
    # TODO: check that http:// urls for the dataset itself get resolved

    # TODO: many more!!
Beispiel #22
0
def test_get_flexible_source_candidates_for_submodule(t, t2):
    f = _get_flexible_source_candidates_for_submodule
    # for now without mocking -- let's just really build a dataset
    ds = create(t)
    clone = install(
        t2, source=t,
        result_xfm='datasets', return_type='item-or-list')

    # first one could just know about itself or explicit url provided
    sshurl = 'ssh://e.c'
    httpurl = 'http://e.c'
    # Expansion with '/.git' no longer done in this helper
    #sm_httpurls = [httpurl, httpurl + '/.git']
    sm_httpurls = [httpurl]
    eq_(f(ds, 'sub'), [])
    eq_(f(ds, 'sub', sshurl), [sshurl])
    eq_(f(ds, 'sub', httpurl), sm_httpurls)
    eq_(f(ds, 'sub', None), [])  # otherwise really we have no clue were to get from

    # but if we work on dsclone then it should also add urls deduced from its
    # own location default remote for current branch
    eq_(f(clone, 'sub'), [t + '/sub'])
    eq_(f(clone, 'sub', sshurl), [t + '/sub', sshurl])
    eq_(f(clone, 'sub', httpurl), [t + '/sub'] + sm_httpurls)
    eq_(f(clone, 'sub'), [t + '/sub'])  # otherwise really we have no clue were to get from
Beispiel #23
0
def test_install_subdataset(src, path):
    # get the superdataset:
    ds = install(path=path, source=src)

    # subdataset not installed:
    subds = Dataset(opj(path, 'sub1'))
    assert_false(subds.is_installed())

    # install it:
    ds.install('sub1')

    ok_(subds.is_installed())
    # Verify that it is the correct submodule installed and not
    # new repository initiated
    assert_equal(set(subds.repo.get_indexed_files()),
                 {'test.dat', 'INFO.txt', 'test-annex.dat'})

    # Now the obnoxious install an annex file within not yet
    # initialized repository!
    with swallow_outputs():  # progress bar
        ds.install(opj('sub2', 'test-annex.dat'))
    subds2 = Dataset(opj(path, 'sub2'))
    assert(subds2.is_installed())
    assert(subds2.repo.file_has_content('test-annex.dat'))
    # we shouldn't be able silently ignore attempt to provide source while
    # "installing" file under git
    assert_raises(FileInGitError, ds.install, opj('sub2', 'INFO.txt'), source="http://bogusbogus")
Beispiel #24
0
def test_publish_gh1691(origin, src_path, dst_path):

    # prepare src; no subdatasets installed, but mount points present
    source = install(src_path, source=origin, recursive=False)
    ok_(exists(opj(src_path, "subm 1")))
    assert_false(Dataset(opj(src_path, "subm 1")).is_installed())

    # some content modification of the superdataset
    create_tree(src_path, {'probe1': 'probe1'})
    source.add('probe1')
    ok_clean_git(src_path)

    # create the target(s):
    source.create_sibling(
        'ssh://localhost:' + dst_path,
        name='target', recursive=True)

    # publish recursively, which silently ignores non-installed datasets
    results = source.publish(to='target', recursive=True)
    assert_result_count(results, 1)
    assert_result_count(results, 1, status='ok', type='dataset', path=source.path)

    # if however, a non-installed subdataset is requsted explicitly, it'll fail
    results = source.publish(path='subm 1', to='target', on_failure='ignore')
    assert_result_count(results, 1, status='impossible', type='dataset', action='publish')
Beispiel #25
0
def test_add_subdataset(path, other):
    subds = create(op.join(path, 'dir'), force=True)
    ds = create(path, force=True)
    ok_(subds.repo.dirty)
    ok_(ds.repo.dirty)
    assert_not_in('dir', ds.subdatasets(result_xfm='relpaths'))
    # "add everything in subds to subds"
    save(dataset=subds.path)
    assert_repo_status(subds.path)
    assert_not_in('dir', ds.subdatasets(result_xfm='relpaths'))
    # but with a base directory we add the dataset subds as a subdataset
    # to ds
    res = ds.save(subds.path)
    assert_in_results(res, action="add", path=subds.path, refds=ds.path)
    assert_in('dir', ds.subdatasets(result_xfm='relpaths'))
    #  create another one
    other = create(other)
    # install into superdataset, but don't add
    other_clone = install(source=other.path, path=op.join(ds.path, 'other'))
    # little dance to get the revolution-type dataset
    other_clone = Dataset(other_clone.path)
    ok_(other_clone.is_installed)
    assert_not_in('other', ds.subdatasets(result_xfm='relpaths'))
    # now add, it should pick up the source URL
    ds.save('other')
    # and that is why, we can reobtain it from origin
    ds.uninstall('other')
    ok_(not other_clone.is_installed())
    ds.get('other')
    ok_(other_clone.is_installed())
Beispiel #26
0
def test_uninstall_subdataset(src, dst):

    ds = install(dst, source=src, recursive=True)
    ok_(ds.is_installed())
    known_subdss = ds.subdatasets(result_xfm='datasets')
    for subds in ds.subdatasets(result_xfm='datasets'):
        ok_(subds.is_installed())

        annexed_files = subds.repo.get_annexed_files()
        subds.repo.get(annexed_files)

        # drop data of subds:
        res = ds.drop(path=subds.path, result_xfm='paths')

        ok_(all([opj(subds.path, f) in res for f in annexed_files]))
        ok_(all([not i for i in subds.repo.file_has_content(annexed_files)]))
        # subdataset is still known
        assert_in(subds.path, ds.subdatasets(result_xfm='paths'))

    eq_(ds.subdatasets(result_xfm='datasets'), known_subdss)

    for subds in ds.subdatasets(result_xfm='datasets'):
        # uninstall subds itself:
        if os.environ.get('DATALAD_TESTS_DATALADREMOTE') \
                and external_versions['git'] < '2.0.9':
            raise SkipTest(
                "Known problem with GitPython. See "
                "https://github.com/gitpython-developers/GitPython/pull/521")
        res = ds.uninstall(path=subds.path, result_xfm='datasets')
        eq_(res[0], subds)
        ok_(not subds.is_installed())
        # just a deinit must not remove the subdataset registration
        eq_(ds.subdatasets(result_xfm='datasets'), known_subdss)
        # mountpoint of subdataset should still be there
        ok_(exists(subds.path))
Beispiel #27
0
def test_install_recursive_repeat(src, path):
    subsub_src = Dataset(opj(src, 'sub 1', 'subsub')).create(force=True)
    sub1_src = Dataset(opj(src, 'sub 1')).create(force=True)
    sub2_src = Dataset(opj(src, 'sub 2')).create(force=True)
    top_src = Dataset(src).create(force=True)
    top_src.add('.', recursive=True)
    ok_clean_git(top_src.path)

    # install top level:
    top_ds = install(path, source=src)
    ok_(top_ds.is_installed() is True)
    sub1 = Dataset(opj(path, 'sub 1'))
    ok_(sub1.is_installed() is False)
    sub2 = Dataset(opj(path, 'sub 2'))
    ok_(sub2.is_installed() is False)
    subsub = Dataset(opj(path, 'sub 1', 'subsub'))
    ok_(subsub.is_installed() is False)

    # install again, now with data and recursive, but recursion_limit 1:
    result = get(os.curdir, dataset=path, recursive=True, recursion_limit=1,
                 result_xfm='datasets')
    # top-level dataset was not reobtained
    assert_not_in(top_ds, result)
    assert_in(sub1, result)
    assert_in(sub2, result)
    assert_not_in(subsub, result)
    ok_(top_ds.repo.file_has_content('top_file.txt') is True)
    ok_(sub1.repo.file_has_content('sub1file.txt') is True)
    ok_(sub2.repo.file_has_content('sub2file.txt') is True)

    # install sub1 again, recursively and with data
    top_ds.install('sub 1', recursive=True, get_data=True)
    ok_(subsub.is_installed())
    ok_(subsub.repo.file_has_content('subsubfile.txt'))
Beispiel #28
0
def test_get_mixed_hierarchy(src, path):

    origin = Dataset(src).create(no_annex=True)
    origin_sub = origin.create('subds')
    with open(opj(origin.path, 'file_in_git.txt'), "w") as f:
        f.write('no idea')
    with open(opj(origin_sub.path, 'file_in_annex.txt'), "w") as f:
        f.write('content')
    origin.add('file_in_git.txt', to_git=True)
    origin_sub.add('file_in_annex.txt')
    origin.save(all_changes=True)

    # now, install that thing:
    ds, subds = install(path, source=src, recursive=True)
    ok_(subds.repo.file_has_content("file_in_annex.txt") is False)

    # and get:
    with swallow_logs(new_level=logging.DEBUG) as cml:
        result = ds.get(curdir, recursive=True)
        assert_re_in('.*Found no annex at {0}. Skipped.'.format(ds),
                     cml.out, flags=re.DOTALL)
        eq_(len(result), 1)
        eq_(result[0]['file'], opj("subds", "file_in_annex.txt"))
        ok_(result[0]['success'] is True)
        ok_(subds.repo.file_has_content("file_in_annex.txt") is True)
Beispiel #29
0
def test_get_mixed_hierarchy(src, path):

    origin = Dataset(src).create(no_annex=True)
    origin_sub = origin.create('subds')
    with open(opj(origin.path, 'file_in_git.txt'), "w") as f:
        f.write('no idea')
    with open(opj(origin_sub.path, 'file_in_annex.txt'), "w") as f:
        f.write('content')
    origin.add('file_in_git.txt', to_git=True)
    origin_sub.add('file_in_annex.txt')
    origin.save()

    # now, install that thing:
    ds, subds = install(
        path, source=src, recursive=True,
        result_xfm='datasets', return_type='item-or-list', result_filter=None)
    ok_(subds.repo.file_has_content("file_in_annex.txt") is False)

    # and get:
    result = ds.get(curdir, recursive=True)
    # git repo and subds
    assert_status(['ok', 'notneeded'], result)
    assert_result_count(
        result, 1, path=opj(subds.path, "file_in_annex.txt"), status='ok')
    ok_(subds.repo.file_has_content("file_in_annex.txt") is True)
Beispiel #30
0
def test_our_metadataset_search(tdir):
    # smoke test for basic search operations on our super-megadataset
    # expensive operation but ok
    ds = install(path=tdir, source="///")
    assert list(ds.search('.', report='*', regex=True))
    assert list(ds.search('.', report='*'))
    assert list(ds.search('.', report_matched=True))

    # and either we could provide output in different formats
    import simplejson
    from datalad.utils import swallow_outputs
    from datalad.api import search_
    with swallow_outputs() as cmo:
        assert list(search_('.', report='*', regex=True, format='json', dataset=ds))
        out = cmo.out
    # since this one is just absorbs all first, we can't go one by one
    assert simplejson.loads(out)

    try:
        import yaml
    except ImportError:
        raise SkipTest("no yaml module")
    with swallow_outputs() as cmo:
        assert list(search_('.', report='*', regex=True, format='yaml', dataset=ds))
        out = cmo.out
    assert yaml.load(out)
Beispiel #31
0
def test_get_multiple_files(path, url, ds_dir):
    from os import listdir
    from datalad.support.network import RI

    file_list = [f for f in listdir(path) if not f.startswith('.')]

    # prepare urls:
    [RI(url + f) for f in file_list]

    # prepare origin
    origin = Dataset(path).create(force=True)
    origin.save(file_list, message="initial")

    ds = install(ds_dir,
                 source=path,
                 result_xfm='datasets',
                 return_type='item-or-list')

    # no content present:
    ok_(not any(ds.repo.file_has_content(file_list)))

    # get two plus an invalid one:
    result = ds.get(['file1.txt', 'file2.txt', 'not_existing.txt'],
                    on_failure='ignore')
    assert_status('impossible', [result[0]])
    assert_status(['ok', 'notneeded'], result[1:])
    # explicitly given not existing file was skipped:
    # (see test_get_invalid_call)
    eq_(set([basename(item.get('path')) for item in result[1:]]),
        {'file1.txt', 'file2.txt'})
    ok_(all(ds.repo.file_has_content(['file1.txt', 'file2.txt'])))

    # get all of them:
    result = ds.get(curdir)
    # there were two files left to get:
    eq_(
        set([
            basename(item.get('path')) for item in result
            if item['type'] == 'file'
        ]), {'file3.txt', 'file4.txt'})
    ok_(all(ds.repo.file_has_content(file_list)))
Beispiel #32
0
def test_get_recurse_dirs(o_path, c_path):

    # prepare source:
    origin = Dataset(o_path).create(force=True)
    origin.save()

    ds = install(c_path,
                 source=o_path,
                 result_xfm='datasets',
                 return_type='item-or-list')

    file_list = [
        'file1.txt',
        opj('subdir', 'file2.txt'),
        opj('subdir', 'subsubdir', 'file3.txt'),
        opj('subdir', 'subsubdir', 'file4.txt')
    ]
    files_in_sub = [
        f for f in file_list if f.startswith(with_pathsep('subdir'))
    ]

    # no content present:
    ok_(not any(ds.repo.file_has_content(file_list)))

    result = ds.get('subdir')
    # check result:
    assert_status('ok', result)
    eq_(
        set([
            item.get('path')[len(ds.path) + 1:] for item in result
            if item['type'] == 'file'
        ]), set(files_in_sub))
    # we also get one report on the subdir
    eq_(len(result) - 1, len(files_in_sub))

    # got all files beneath subdir:
    ok_(all(ds.repo.file_has_content(files_in_sub)))

    # additionally got file1.txt silently, since it has the same content as
    # subdir/subsubdir/file4.txt:
    ok_(ds.repo.file_has_content('file1.txt') is True)
Beispiel #33
0
def test_add_subdataset(path, other):
    subds = create(op.join(path, 'dir'), force=True)
    ds = create(path, force=True)
    ok_(subds.repo.dirty)
    ok_(ds.repo.dirty)
    assert_not_in('dir', ds.subdatasets(result_xfm='relpaths'))
    # "add everything in subds to subds"
    save(dataset=subds.path)
    assert_repo_status(subds.path)
    assert_not_in('dir', ds.subdatasets(result_xfm='relpaths'))
    # but with a base directory we add the dataset subds as a subdataset
    # to ds
    res = ds.save(subds.path)
    assert_in_results(res, action="add", path=subds.path, refds=ds.path)
    res = ds.subdatasets()
    assert_result_count(res, 1)
    assert_result_count(
        res,
        1,
        # essentials
        path=op.join(ds.path, 'dir'),
        gitmodule_url='./dir',
        gitmodule_name='dir',
        # but also the branch, by default
        gitmodule_branch='master',
    )
    #  create another one
    other = create(other)
    # install into superdataset, but don't add
    other_clone = install(source=other.path, path=op.join(ds.path, 'other'))
    # little dance to get the revolution-type dataset
    other_clone = Dataset(other_clone.path)
    ok_(other_clone.is_installed)
    assert_not_in('other', ds.subdatasets(result_xfm='relpaths'))
    # now add, it should pick up the source URL
    ds.save('other')
    # and that is why, we can reobtain it from origin
    ds.uninstall('other')
    ok_(not other_clone.is_installed())
    ds.get('other')
    ok_(other_clone.is_installed())
Beispiel #34
0
def test_install_recursive_with_data(src, path):

    # now again; with data:
    res = install(path, source=src, recursive=True, get_data=True,
                  result_filter=None, result_xfm=None)
    assert_status('ok', res)
    # installed a dataset and two subdatasets, and one file with content in
    # each
    assert_result_count(res, 3, type='dataset', action='install')
    assert_result_count(res, 3, type='file', action='get')
    # we recurse top down during installation, so toplevel should appear at
    # first position in returned list
    eq_(res[0]['path'], path)
    top_ds = YieldDatasets()(res[0])
    ok_(top_ds.is_installed())
    if isinstance(top_ds.repo, AnnexRepo):
        ok_(all(top_ds.repo.file_has_content(top_ds.repo.get_annexed_files())))
    for subds in top_ds.subdatasets(recursive=True, result_xfm='datasets'):
        ok_(subds.is_installed(), "Not installed: %s" % (subds,))
        if isinstance(subds.repo, AnnexRepo):
            ok_(all(subds.repo.file_has_content(subds.repo.get_annexed_files())))
Beispiel #35
0
def check_datasets_datalad_org(suffix, tdir):
    # Test that git annex / datalad install, get work correctly on our datasets.datalad.org
    # Apparently things can break, especially with introduction of the
    # smart HTTP backend for apache2 etc
    ds = install(tdir, source='///dicoms/dartmouth-phantoms/bids_test6-PD+T2w' + suffix)
    eq_(ds.config.get('remote.origin.annex-ignore', None), None)
    # assert_result_count and not just assert_status since for some reason on
    # Windows we get two records due to a duplicate attempt (as res[1]) to get it
    # again, which is reported as "notneeded".  For the purpose of this test
    # it doesn't make a difference.
    # git-annex version is not "real" - but that is about when fix was introduced
    from datalad import cfg
    if on_windows \
        and cfg.obtain("datalad.repo.version") < 6 \
        and external_versions['cmd:annex'] <= '7.20181203':
        raise SkipTest("Known to fail, needs fixed git-annex")
    assert_result_count(
        ds.get(op.join('001-anat-scout_ses-{date}', '000001.dcm')),
        1,
        status='ok')
    assert_status('ok', ds.remove())
Beispiel #36
0
def test_update_unborn_master(path):
    ds_a = Dataset(op.join(path, "ds-a")).create()
    ds_a.repo.call_git(["branch", "-m", DEFAULT_BRANCH, "other"])
    ds_a.repo.checkout(DEFAULT_BRANCH, options=["--orphan"])
    ds_b = install(source=ds_a.path, path=op.join(path, "ds-b"))

    ds_a.repo.checkout("other")
    (ds_a.pathobj / "foo").write_text("content")
    ds_a.save()

    # clone() will try to switch away from an unborn branch if there
    # is another ref available.  Reverse these efforts so that we can
    # test that update() fails reasonably here because we should still
    # be able to update from remotes that datalad didn't clone.
    ds_b.repo.update_ref("HEAD", "refs/heads/" + DEFAULT_BRANCH, symbolic=True)
    assert_false(ds_b.repo.commit_exists("HEAD"))
    assert_status("impossible", ds_b.update(merge=True, on_failure="ignore"))

    ds_b.repo.checkout("other")
    assert_status("ok", ds_b.update(merge=True, on_failure="ignore"))
    eq_(ds_a.repo.get_hexsha(), ds_b.repo.get_hexsha())
Beispiel #37
0
def test_get_in_unavailable_subdataset(src, path):
    _make_dataset_hierarchy(src)
    root = install(
        path, source=src,
        result_xfm='datasets', return_type='item-or-list')
    targetpath = opj('sub1', 'sub2')
    targetabspath = opj(root.path, targetpath)
    with chpwd(path):
        res = get(targetabspath)
    assert_result_count(res, 2, status='ok', action='install', type='dataset')
    # dry-fit result filter that only returns the result that matched the requested
    # path
    filtered = [r for r in res if only_matching_paths(r, path=targetabspath)]
    assert_result_count(
        filtered, 1, status='ok', action='install', type='dataset',
        path=targetabspath)
    # we got the dataset, and its immediate content, but nothing below
    sub2 = Dataset(targetabspath)
    ok_(sub2.is_installed())
    ok_(sub2.repo.file_has_content('file_in_annex.txt') is True)
    ok_(not Dataset(opj(targetabspath, 'sub3')).is_installed())
Beispiel #38
0
def test_install_recursive_with_data(src, path):

    # now again; with data:
    ds_list = install(path, source=src, recursive=True, get_data=True)
    # installed a dataset and two subdatasets, and two files:
    eq_(len(ds_list), 5)
    eq_(sum([isinstance(i, Dataset) for i in ds_list]), 3)
    # we recurse top down during installation, so toplevel should appear at
    # first position in returned list
    eq_(ds_list[0].path, path)
    top_ds = ds_list[0]
    ok_(top_ds.is_installed())
    if isinstance(top_ds.repo, AnnexRepo):
        ok_(all(top_ds.repo.file_has_content(top_ds.repo.get_annexed_files())))
    for sub in top_ds.get_subdatasets(recursive=True):
        subds = Dataset(opj(path, sub))
        ok_(subds.is_installed(), "Not installed: %s" % opj(path, sub))
        if isinstance(subds.repo, AnnexRepo):
            ok_(
                all(subds.repo.file_has_content(
                    subds.repo.get_annexed_files())))
Beispiel #39
0
def check_get_subdataset_inherit_reckless(override, path):
    src = Dataset(opj(path, "a")).create()
    src_subds = src.create("sub")
    src_subds.create("subsub")
    src.save(recursive=True)

    clone = install(opj(path, "b"), source=src, reckless="auto",
                    result_xfm="datasets", return_type="item-or-list")
    clone_sub = Dataset(clone.pathobj / "sub")
    assert_false(clone_sub.is_installed())
    clone_subsub = Dataset(clone.pathobj / "sub" / "subsub")

    clone.get(opj("sub", "subsub"), reckless=False if override else None)
    ok_(clone_sub.is_installed())
    ok_(clone_subsub.is_installed())

    for sub in [clone_sub, clone_subsub]:
        eq_(sub.config.get("datalad.clone.reckless", None),
            None if override else "auto")
        eq_(sub.config.get("annex.hardlink", None),
            None if override else "true")
Beispiel #40
0
def test_get_install_missing_subdataset(src, path):

    ds = install(path=path, source=src)
    ds.create(force=True)  # force, to cause dataset initialization
    subs = [Dataset(s_path) for s_path in ds.get_subdatasets(absolute=True)]
    ok_(all([not sub.is_installed() for sub in subs]))

    # we don't install anything, if no explicitly given path points into a
    # not yet installed subdataset:
    ds.get(curdir)
    ok_(all([not sub.is_installed() for sub in subs]))

    # but we do, whenever a given path is contained in such a subdataset:
    file_ = opj(subs[0].path, 'test-annex.dat')
    ds.get(file_)
    ok_(subs[0].is_installed())
    ok_(subs[0].repo.file_has_content('test-annex.dat') is True)

    # but we fulfill any handles, and dataset handles too
    ds.get(curdir, recursive=True)
    ok_(all([sub.is_installed() for sub in subs]))
Beispiel #41
0
def test_target_ssh_since(origin, src_path, target_path):
    # prepare src
    source = install(src_path, source=origin, recursive=True)
    eq_(len(source.subdatasets()), 2)
    # get a new subdataset and make sure it is committed in the super
    source.create('brandnew')
    eq_(len(source.subdatasets()), 3)
    ok_clean_git(source.path)

    # and now we create a sibling for the new subdataset only
    assert_create_sshwebserver(
        name='dominique_carrera',
        dataset=source,
        sshurl="ssh://localhost" + target_path,
        recursive=True,
        since='HEAD~1')
    # there is one thing in the target directory only, and that is the
    # remote repo of the newly added subdataset

    target = Dataset(target_path)
    ok_(not target.is_installed())  # since we didn't create it due to since
    eq_(['brandnew'], os.listdir(target_path))

    # now test functionality if we add a subdataset with a subdataset
    brandnew2 = source.create('brandnew2')
    brandnewsub = brandnew2.create('sub')
    brandnewsubsub = brandnewsub.create('sub')
    # and now we create a sibling for the new subdataset only
    assert_create_sshwebserver(
        name='dominique_carrera',
        dataset=source,
        sshurl="ssh://localhost" + target_path,
        recursive=True,
        existing='skip')
    # verify that it created the sub and sub/sub
    ok_(Dataset(_path_(target_path, 'brandnew2/sub')).is_installed())
    ok_(Dataset(_path_(target_path, 'brandnew2/sub/sub')).is_installed())

    # we installed without web ui - no hooks should be created/enabled
    assert_postupdate_hooks(_path_(target_path, 'brandnew'), installed=False)
Beispiel #42
0
def test_merge_conflict_in_subdataset_only(path):
    path = Path(path)
    ds_src = Dataset(path / "src").create()
    if ds_src.repo.is_managed_branch():
        # `git annex sync REMOTE` is used on an adjusted branch, but this error
        # depends on `git merge TARGET` being used.
        raise SkipTest("Test depends on non-adjusted branch")
    ds_src_sub_conflict = ds_src.create("sub_conflict")
    ds_src_sub_noconflict = ds_src.create("sub_noconflict")
    ds_src.save()

    # Set up a scenario where one subdataset has a conflict between the remote
    # and local version, but the parent dataset does not have a conflict
    # because it hasn't recorded the subdataset state.
    ds_clone = install(source=ds_src.path, path=path / "clone",
                       recursive=True, result_xfm="datasets")
    ds_clone_sub_conflict = Dataset(path / "clone" / "sub_conflict")
    ds_clone_sub_noconflict = Dataset(path / "clone" / "sub_noconflict")

    (ds_src_sub_conflict.pathobj / "foo").write_text("src content")
    ds_src_sub_conflict.save(to_git=True)

    (ds_clone_sub_conflict.pathobj / "foo").write_text("clone content")
    ds_clone_sub_conflict.save(to_git=True)

    (ds_src_sub_noconflict.pathobj / "foo").write_text("src content")
    ds_src_sub_noconflict.save()

    res = ds_clone.update(merge=True, recursive=True, on_failure="ignore")
    assert_in_results(res, action="merge", status="error",
                      path=ds_clone_sub_conflict.path)
    assert_in_results(res, action="merge", status="ok",
                      path=ds_clone_sub_noconflict.path)
    assert_in_results(res, action="save", status="ok",
                      path=ds_clone.path)
    # We saved the subdataset without a conflict...
    assert_repo_status(ds_clone_sub_noconflict.path)
    # ... but the one with the conflict leaves it for the caller to handle.
    ok_(ds_clone_sub_conflict.repo.call_git(
        ["ls-files", "--unmerged", "--", "foo"]).strip())
Beispiel #43
0
def test_install_skip_list_arguments(src, path, path_outside):
    ds = install(path, source=src)
    ok_(ds.is_installed())

    # install a list with valid and invalid items:
    result = ds.install(path=['subm 1', 'not_existing', path_outside, '2'],
                        get_data=False,
                        on_failure='ignore',
                        result_xfm=None,
                        return_type='list')
    # good and bad results together
    ok_(isinstance(result, list))
    eq_(len(result), 4)
    # check that we have an 'impossible/error' status for both invalid args
    # but all the other tasks have been accomplished
    assert_result_count(result,
                        1,
                        status='impossible',
                        message="path does not exist",
                        path=opj(ds.path, 'not_existing'))
    assert_result_count(result,
                        1,
                        status='error',
                        message=("path not associated with dataset %s", ds),
                        path=path_outside)
    for sub in [Dataset(opj(path, 'subm 1')), Dataset(opj(path, '2'))]:
        assert_result_count(result,
                            1,
                            status='ok',
                            message=('Installed subdataset in order to get %s',
                                     sub.path))
        ok_(sub.is_installed())

    # return of get is always a list, by default, even if just one thing was gotten
    # in this case 'subm1' was already obtained above, so this will get this
    # content of the subdataset
    with assert_raises(IncompleteResultsError) as cme:
        ds.install(path=['subm 1', 'not_existing'])
    with assert_raises(IncompleteResultsError) as cme:
        ds.get(path=['subm 1', 'not_existing'])
Beispiel #44
0
def test_get_autoresolve_recurse_subdatasets(src, path):

    origin = Dataset(src).create()
    origin_sub = origin.create('sub')
    origin_subsub = origin_sub.create('subsub')
    with open(opj(origin_subsub.path, 'file_in_annex.txt'), "w") as f:
        f.write('content')
    origin.add('.', recursive=True)

    ds = install(
        path, source=src,
        result_xfm='datasets', return_type='item-or-list')
    eq_(len(ds.subdatasets(fulfilled=True)), 0)

    results = get(opj(ds.path, 'sub'), recursive=True, result_xfm='datasets')
    eq_(len(ds.subdatasets(fulfilled=True, recursive=True)), 2)
    subsub = Dataset(opj(ds.path, 'sub', 'subsub'))
    ok_(subsub.is_installed())
    assert_in(subsub, results)
    # all file handles are fulfilled by default
    ok_(Dataset(opj(ds.path, 'sub', 'subsub')).repo.file_has_content(
        "file_in_annex.txt") is True)
Beispiel #45
0
def test_install_skip_failed_recursive(src, path):

    # install top level:
    ds = install(path, source=src)
    sub1 = Dataset(opj(path, 'subm 1'))
    sub2 = Dataset(opj(path, 'subm 2'))
    # sabotage recursive installation of 'subm 1' by polluting the target:
    with open(opj(path, 'subm 1', 'blocking.txt'), "w") as f:
        f.write("sdfdsf")

    with swallow_logs(new_level=logging.WARNING) as cml:
        result = ds.get(os.curdir, recursive=True)
        # toplevel dataset was in the house already
        assert_not_in(ds, result)
        assert_in(sub2, result)
        assert_not_in(sub1, result)
        cml.assert_logged(
            msg=
            "Target {} already exists and is not an installed dataset. Skipped."
            .format(sub1.path),
            regex=False,
            level='WARNING')
Beispiel #46
0
def test_update_fetch_failure(path):
    path = Path(path)

    ds_a = Dataset(path / "ds_a").create()
    s1 = ds_a.create("s1")
    ds_a.create("s2")

    ds_b = install(source=ds_a.path, path=str(path / "ds-b"), recursive=True)

    # Rename s1 to make fetch fail.
    s1.pathobj.rename(s1.pathobj.parent / "s3")

    res = ds_b.update(recursive=True, on_failure="ignore")
    assert_in_results(res,
                      status="error",
                      path=str(ds_b.pathobj / "s1"),
                      action="update")
    assert_in_results(res,
                      status="ok",
                      path=str(ds_b.pathobj / "s2"),
                      action="update")
    assert_in_results(res, status="ok", path=ds_b.path, action="update")
Beispiel #47
0
def test_update_follow_parentds_lazy_other_branch(path):
    path = Path(path)
    ds_src = Dataset(path / "source").create()
    ds_src_sub = ds_src.create("sub")
    ds_src_sub.repo.checkout(DEFAULT_BRANCH, options=["-bother"])
    (ds_src_sub.pathobj / "foo").write_text("on other branch")
    ds_src_sub.save()
    ds_src_sub.repo.checkout(DEFAULT_BRANCH)
    ds_src.save(recursive=True)
    assert_repo_status(ds_src.path)

    ds_clone = install(source=ds_src.path,
                       path=path / "clone",
                       recursive=True,
                       result_xfm="datasets")
    ds_src_sub.repo.checkout("other")
    ds_src.save(recursive=True)

    with patch("datalad.support.gitrepo.GitRepo.fetch") as fetch_cmd:
        ds_clone.update(follow="parentds",
                        merge="ff-only",
                        recursive=True,
                        on_failure="ignore")
        eq_(fetch_cmd.call_count, 2)

    # With parentds-lazy, an unneeded fetch call in the subdataset is dropped.
    with patch("datalad.support.gitrepo.GitRepo.fetch") as fetch_cmd:
        ds_clone.update(follow="parentds-lazy",
                        merge="ff-only",
                        recursive=True,
                        on_failure="ignore")
        eq_(fetch_cmd.call_count, 1)

    if not ds_clone.repo.is_managed_branch():
        # Now the real thing.
        ds_clone.update(follow="parentds-lazy",
                        merge="ff-only",
                        recursive=True)
        ok_(op.lexists(str(ds_clone.pathobj / "sub" / "foo")))
Beispiel #48
0
def test_get_subdataset_direct_fetch(path):
    path = Path(path)
    origin = Dataset(path / "origin").create()
    if origin.repo.is_managed_branch():
        # The setup here probably breaks down with adjusted branches.
        raise SkipTest("Test assumes non-adjusted branches")

    for sub in ["s0", "s1"]:
        sds = origin.create(origin.pathobj / sub)
        sds.repo.commit(msg="another commit", options=["--allow-empty"])
    origin.save()
    s0 = Dataset(origin.pathobj / "s0")
    s1 = Dataset(origin.pathobj / "s1")
    # Abandon the recorded commit so that it needs to be brought down by a
    # direct fetch.
    s0.repo.call_git(["reset", "--hard", "HEAD~"])
    s1.repo.call_git(["reset", "--hard", "HEAD~"])

    # Tweak the configuration of s0 to make the direct fetch fail.
    # Disallow direct oid fetch (default).
    s0.repo.config.set("uploadpack.allowAnySHA1InWant", "false", where="local")
    # Configure the fetcher to avoid v2, which allows fetching unadvertised
    # objects regardless of the value of uploadpack.allowAnySHA1InWant.
    s0.repo.config.set("protocol.version", "0", where="local")

    # Configure s1 to succeed with direct fetch.
    s1.repo.config.set("uploadpack.allowAnySHA1InWant", "true", where="local")

    clone = install(str(path / "clone"),
                    source="ssh://datalad-test:" +
                    origin.repo.pathobj.as_posix())

    res = clone.get(["s0", "s1"], on_failure="ignore")
    assert_result_count(res,
                        1,
                        action="install",
                        type="dataset",
                        status="error")
    assert_result_count(res, 1, action="install", type="dataset", status="ok")
Beispiel #49
0
def test_uninstall_subdataset(src, dst):

    ds = install(dst, source=src, recursive=True)
    ok_(ds.is_installed())
    known_subdss = ds.subdatasets(result_xfm='datasets')
    for subds in ds.subdatasets(result_xfm='datasets'):
        ok_(subds.is_installed())

        annexed_files = subds.repo.get_annexed_files()
        subds.repo.get(annexed_files)

        # drop data of subds:
        res = ds.drop(path=subds.path, result_xfm='paths')

        ok_(all([opj(subds.path, f) in res for f in annexed_files]))
        ok_(all([not i for i in subds.repo.file_has_content(annexed_files)]))
        # subdataset is still known
        assert_in(subds.path, ds.subdatasets(result_xfm='paths'))

    eq_(ds.subdatasets(result_xfm='datasets'), known_subdss)

    for subds in ds.subdatasets(result_xfm='datasets'):
        # uninstall subds itself:
        # simulate a cmdline invocation pointing to the subdataset
        # with a relative path from outside the superdataset to catch
        # https://github.com/datalad/datalad/issues/4001
        pwd = Path(dst).parent
        with chpwd(str(pwd)):
            res = uninstall(
                dataset=ds.path,
                path=str(subds.pathobj.relative_to(pwd)),
                result_xfm='datasets',
            )
        eq_(res[0], subds)
        ok_(not subds.is_installed())
        # just a deinit must not remove the subdataset registration
        eq_(ds.subdatasets(result_xfm='datasets'), known_subdss)
        # mountpoint of subdataset should still be there
        ok_(exists(subds.path))
Beispiel #50
0
def test_install_subds_from_another_remote(topdir):
    # https://github.com/datalad/datalad/issues/1905
    with chpwd(topdir):
        origin_ = 'origin'
        clone1_ = 'clone1'
        clone2_ = 'clone2'

        origin = create(origin_, no_annex=True)
        clone1 = install(source=origin, path=clone1_)
        # print("Initial clone")
        clone1.create_sibling('ssh://localhost%s/%s' % (getpwd(), clone2_), name=clone2_)

        # print("Creating clone2")
        clone1.publish(to=clone2_)
        clone2 = Dataset(clone2_)
        # print("Initiating subdataset")
        clone2.create('subds1')

        # print("Updating")
        clone1.update(merge=True, sibling=clone2_)
        # print("Installing within updated dataset -- should be able to install from clone2")
        clone1.install('subds1')
Beispiel #51
0
def test_reobtain_data(originpath, destpath):
    origin = Dataset(originpath).create()
    ds = install(source=originpath,
                 path=destpath,
                 result_xfm='datasets',
                 return_type='item-or-list')
    # no harm
    assert_result_count(ds.update(merge=True, reobtain_data=True), 1)
    # content
    create_tree(origin.path, {'load.dat': 'heavy'})
    origin.add(opj(origin.path, 'load.dat'))
    # update does not bring data automatically
    assert_result_count(ds.update(merge=True, reobtain_data=True), 1)
    assert_in('load.dat', ds.repo.get_annexed_files())
    assert_false(ds.repo.file_has_content('load.dat'))
    # now get data
    ds.get('load.dat')
    ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy')
    # new content at origin
    create_tree(origin.path, {'novel': 'but boring'})
    origin.add('.')
    # update must not bring in data for new file
    result = ds.update(merge=True, reobtain_data=True)
    assert_in_results(result, action='get', status='notneeded')

    ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy')
    assert_in('novel', ds.repo.get_annexed_files())
    assert_false(ds.repo.file_has_content('novel'))
    # modify content at origin
    os.remove(opj(origin.path, 'load.dat'))
    create_tree(origin.path, {'load.dat': 'light'})
    origin.add('.')
    # update must update file with existing data, but leave empty one alone
    res = ds.update(merge=True, reobtain_data=True)
    assert_result_count(res, 2)
    assert_result_count(res, 1, status='ok', type='dataset', action='update')
    assert_result_count(res, 1, status='ok', type='file', action='get')
    ok_file_has_content(opj(ds.path, 'load.dat'), 'light')
    assert_false(ds.repo.file_has_content('novel'))
Beispiel #52
0
def test_uninstall_subdataset(src, dst):

    ds = install(dst, source=src, recursive=True)[0]
    ok_(ds.is_installed())
    for subds_path in ds.get_subdatasets():
        subds = Dataset(opj(ds.path, subds_path))
        ok_(subds.is_installed())

        annexed_files = subds.repo.get_annexed_files()
        subds.repo.get(annexed_files)

        # uninstall data of subds:
        assert_raises(ValueError, ds.uninstall, path=subds_path)
        res = ds.uninstall(path=subds_path, recursive=True)
        ok_(all([f in res for f in annexed_files]))
        ok_(all([not i for i in subds.repo.file_has_content(annexed_files)]))
        # subdataset is still known
        assert_in(subds_path, ds.get_subdatasets())

    for subds_path in ds.get_subdatasets():
        # uninstall subds itself:
        assert_raises(ValueError,
                      ds.uninstall,
                      path=subds_path,
                      remove_handles=True,
                      remove_history=True)
        if os.environ.get('DATALAD_TESTS_DATALADREMOTE') \
                and external_versions['git'] < '2.0.9':
            raise SkipTest(
                "Known problem with GitPython. See "
                "https://github.com/gitpython-developers/GitPython/pull/521")
        res = ds.uninstall(path=subds_path,
                           remove_handles=True,
                           remove_history=True,
                           recursive=True)
        subds = Dataset(opj(ds.path, subds_path))
        eq_(res[0], subds)
        ok_(not subds.is_installed())
        ok_(not exists(subds.path))
Beispiel #53
0
def test_install_branch(path=None):
    path = Path(path)
    ds_a = create(path / "ds_a")
    ds_a.create("sub")

    repo_a = ds_a.repo
    repo_a.commit(msg="c1", options=["--allow-empty"])
    repo_a.checkout(DEFAULT_BRANCH + "-other", ["-b"])
    repo_a.commit(msg="c2", options=["--allow-empty"])
    repo_a.checkout(DEFAULT_BRANCH)

    ds_b = install(source=ds_a.path,
                   path=str(path / "ds_b"),
                   branch=DEFAULT_BRANCH + "-other",
                   recursive=True)

    repo_b = ds_b.repo
    eq_(repo_b.get_corresponding_branch() or repo_b.get_active_branch(),
        DEFAULT_BRANCH + "-other")

    repo_sub = Dataset(ds_b.pathobj / "sub").repo
    eq_(repo_sub.get_corresponding_branch() or repo_sub.get_active_branch(),
        DEFAULT_BRANCH)
Beispiel #54
0
def test_install_skip_failed_recursive(src, path):

    # install top level:
    ds = install(path, source=src)
    sub1 = Dataset(opj(path, 'subm 1'))
    sub2 = Dataset(opj(path, '2'))
    # sabotage recursive installation of 'subm 1' by polluting the target:
    with open(opj(path, 'subm 1', 'blocking.txt'), "w") as f:
        f.write("sdfdsf")

    with swallow_logs(new_level=logging.WARNING) as cml:
        result = ds.get(
            os.curdir, recursive=True,
            on_failure='ignore', result_xfm=None)
        # toplevel dataset was in the house already
        assert_result_count(
            result, 0, path=ds.path, type='dataset')
        # subm 1 should fail to install. [1] since comes after '2' submodule
        assert_in_results(
            result, status='error', path=sub1.path, type='dataset',
            message='target path already exists and not empty, refuse to '
                    'clone into target path')
        assert_in_results(result, status='ok', path=sub2.path)
Beispiel #55
0
def test_install_recursive_repeat(src, path):
    subsub_src = Dataset(opj(src, 'sub 1', 'subsub')).create(force=True)
    sub1_src = Dataset(opj(src, 'sub 1')).create(force=True)
    sub2_src = Dataset(opj(src, 'sub 2')).create(force=True)
    top_src = Dataset(src).create(force=True)
    top_src.add('.', recursive=True)
    ok_clean_git(top_src.path)

    # install top level:
    top_ds = install(path, source=src)
    ok_(top_ds.is_installed() is True)
    sub1 = Dataset(opj(path, 'sub 1'))
    ok_(sub1.is_installed() is False)
    sub2 = Dataset(opj(path, 'sub 2'))
    ok_(sub2.is_installed() is False)
    subsub = Dataset(opj(path, 'sub 1', 'subsub'))
    ok_(subsub.is_installed() is False)

    # install again, now with data and recursive, but recursion_limit 1:
    result = get(os.curdir,
                 dataset=path,
                 recursive=True,
                 recursion_limit=1,
                 result_xfm='datasets')
    # top-level dataset was not reobtained
    assert_not_in(top_ds, result)
    assert_in(sub1, result)
    assert_in(sub2, result)
    assert_not_in(subsub, result)
    ok_(top_ds.repo.file_has_content('top_file.txt') is True)
    ok_(sub1.repo.file_has_content('sub1file.txt') is True)
    ok_(sub2.repo.file_has_content('sub2file.txt') is True)

    # install sub1 again, recursively and with data
    top_ds.install('sub 1', recursive=True, get_data=True)
    ok_(subsub.is_installed())
    ok_(subsub.repo.file_has_content('subsubfile.txt'))
Beispiel #56
0
def test_get_multiple_files(path, url, ds_dir):
    from os import listdir
    from datalad.support.network import RI

    file_list = [f for f in listdir(path) if not f.startswith('.')]

    # prepare urls:
    urls = [RI(url + f) for f in file_list]

    # prepare origin
    origin = Dataset(path).create(force=True)
    origin.add(file_list)
    origin.save("initial")

    ds = install(ds_dir, source=path)

    # no content present:
    ok_(not any(ds.repo.file_has_content(file_list)))

    # get two plus an invalid one:
    with assert_raises(IncompleteResultsError) as cme:
        ds.get(['file1.txt', 'file2.txt', 'not_existing.txt'])
    result = cme.exception.results
    # explicitly given not existing file was skipped:
    # (see test_get_invalid_call)
    eq_(set([item.get('file') for item in result]),
        {'file1.txt', 'file2.txt'})
    ok_(all([x['success'] is True
             for x in result if x['file'] in ['file1.txt', 'file2.txt']]))
    ok_(all(ds.repo.file_has_content(['file1.txt', 'file2.txt'])))

    # get all of them:
    result = ds.get(curdir)
    # there were two files left to get:
    eq_(set([item.get('file') for item in result]),
        {'file3.txt', 'file4.txt'})
    ok_(all(ds.repo.file_has_content(file_list)))
Beispiel #57
0
def test_uninstall_subdataset(src, dst):

    ds = install(dst, source=src, recursive=True)[0]
    ok_(ds.is_installed())
    known_subdss = ds.get_subdatasets()
    for subds_path in ds.get_subdatasets():
        subds = Dataset(opj(ds.path, subds_path))
        ok_(subds.is_installed())

        annexed_files = subds.repo.get_annexed_files()
        subds.repo.get(annexed_files)

        # drop data of subds:
        res = ds.drop(path=subds_path)

        ok_(all([opj(subds.path, f) in res for f in annexed_files]))
        ok_(all([not i for i in subds.repo.file_has_content(annexed_files)]))
        # subdataset is still known
        assert_in(subds_path, ds.get_subdatasets())

    eq_(ds.get_subdatasets(), known_subdss)

    for subds_path in ds.get_subdatasets():
        # uninstall subds itself:
        if os.environ.get('DATALAD_TESTS_DATALADREMOTE') \
                and external_versions['git'] < '2.0.9':
            raise SkipTest(
                "Known problem with GitPython. See "
                "https://github.com/gitpython-developers/GitPython/pull/521")
        res = ds.uninstall(path=subds_path)
        subds = Dataset(opj(ds.path, subds_path))
        eq_(res[0], subds)
        ok_(not subds.is_installed())
        # just a deinit must not remove the subdataset registration
        eq_(ds.get_subdatasets(), known_subdss)
        # mountpoint of subdataset should still be there
        ok_(exists(subds.path))
Beispiel #58
0
def test_merge_follow_parentds_subdataset_other_branch(path):
    path = Path(path)
    ds_src = Dataset(path / "source").create()
    on_adjusted = ds_src.repo.is_managed_branch()
    ds_src_subds = ds_src.create("subds")
    ds_clone = install(source=ds_src.path,
                       path=path / "clone",
                       recursive=True,
                       result_xfm="datasets")
    ds_clone_subds = Dataset(ds_clone.pathobj / "subds")

    ds_src_subds.repo.call_git(["checkout", "-b", "other"])
    (ds_src_subds.pathobj / "foo").write_text("foo content")
    ds_src.save(recursive=True)
    assert_repo_status(ds_src.path)

    res = ds_clone.update(merge=True,
                          follow="parentds",
                          recursive=True,
                          on_failure="ignore")
    if on_adjusted:
        # Our git-annex sync based on approach on adjusted branches is
        # incompatible with follow='parentds'.
        assert_in_results(res, action="update", status="impossible")
        return
    else:
        assert_in_results(res, action="update", status="ok")
    eq_(ds_clone.repo.get_hexsha(), ds_src.repo.get_hexsha())
    ok_(ds_clone_subds.repo.is_under_annex("foo"))

    (ds_src_subds.pathobj / "bar").write_text("bar content")
    ds_src.save(recursive=True)
    ds_clone_subds.repo.checkout(DEFAULT_BRANCH, options=["-bnew"])
    ds_clone.update(merge=True, follow="parentds", recursive=True)
    if not on_adjusted:
        eq_(ds_clone.repo.get_hexsha(), ds_src.repo.get_hexsha())
Beispiel #59
0
def test_gh3356(src, path):
    # create toy version of gh-3356 scenario
    origin = Dataset(src).create()
    origin_sub = origin.create(origin.pathobj / 'subdir'/ 'subds')
    for p in (
            (origin_sub.pathobj / 'data' / 'file_in_annex.txt'),
            (origin_sub.pathobj / 'data' / 'file_in_annex2.txt')):
        p.parent.mkdir(parents=True, exist_ok=True)
        p.write_text(p.name)
    origin.save(recursive=True)
    clone = install(
        path, source=src, result_xfm='datasets', return_type='item-or-list')
    targetpaths = [
        opj('subdir', 'subds', 'data', 'file_in_annex.txt'),
        opj('subdir', 'subds', 'data', 'file_in_annex2.txt'),
    ]
    with chpwd(path):
        res = get(targetpaths)
    # get() must report success on two files
    assert_result_count(res, 2, action='get', type='file', status='ok')
    # status must report content for two files
    assert_result_count(
        clone.status(recursive=True, annex='all', report_filetype='eval'), 2,
        action='status', has_content=True)
Beispiel #60
0
def test_report(path, orig):
    origds, subds = make_ds_hierarchy_with_metadata(orig)
    # now clone to a new place to ensure no content is present
    ds = install(source=origds.path, path=path)
    # only dataset-global metadata
    res = meta_extract(dataset=ds, process_type='dataset')
    assert_result_count(res, 1)
    core_dsmeta = _get_dsmeta_from_core_metadata(
        res[0]['metadata']['metalad_core'])
    assert_in(
        {
            '@type': 'Dataset',
            '@id': 'datalad:{}'.format(subds.repo.get_hexsha()),
            'identifier': 'datalad:{}'.format(subds.id),
            'name': 'sub'
        }, core_dsmeta['hasPart'])
    # has not seen the content
    assert_not_in('contentbytesize', core_dsmeta)
    res = meta_extract(dataset=ds, process_type='content')
    assert (any(
        dict(tag=['one', 'two']) == r['metadata'].get('metalad_annex', None)
        for r in res))
    # we have a report on file(s)
    assert (len(res) > 0)
    # but no subdataset reports
    assert_result_count(res, 0, type='dataset')
    content_size = sum(
        (_get_dsmeta_from_core_metadata(r['metadata']['metalad_core'])
         if r['type'] == 'dataset' else r['metadata']['metalad_core']
         )['contentbytesize'] for r in res)
    # and now all together
    res = meta_extract(dataset=ds, process_type='all')
    # got a content size report that sums up all individual sizes
    eq_((_get_dsmeta_from_core_metadata(res[0]['metadata']['metalad_core'])
         if res[0]['type'] == 'dataset' else
         res[0]['metadata']['metalad_core'])['contentbytesize'], content_size)