Beispiel #1
0
def test_GitRepo_pull(test_path, orig_path, clone_path):

    origin = GitRepo.clone(test_path, orig_path)
    clone = GitRepo.clone(orig_path, clone_path)
    filename = get_most_obscure_supported_name()

    with open(op.join(orig_path, filename), 'w') as f:
        f.write("New file.")
    origin.add(filename)
    origin.commit("new file added.")
    clone.pull()
    ok_(op.exists(op.join(clone_path, filename)))

    # While at it, let's test _get_remotes_having_commit a bit
    clone.add_remote("very_origin", test_path)
    clone.fetch("very_origin")
    eq_(
        clone._get_remotes_having_commit(clone.get_hexsha()),
        ['origin']
    )
    prev_commit = clone.get_hexsha('HEAD^')
    eq_(
        set(clone._get_remotes_having_commit(prev_commit)),
        {'origin', 'very_origin'}
    )
Beispiel #2
0
def test_GitRepo_fetch(test_path, orig_path, clone_path):

    origin = GitRepo.clone(test_path, orig_path)
    clone = GitRepo.clone(orig_path, clone_path)
    filename = get_most_obscure_supported_name()

    origin.checkout("new_branch", ['-b'])
    with open(op.join(orig_path, filename), 'w') as f:
        f.write("New file.")
    origin.add(filename)
    origin.commit("new file added.")

    fetched = clone.fetch(remote='origin')
    # test FetchInfo list returned by fetch
    eq_([u'origin/' + clone.get_active_branch(), u'origin/new_branch'],
        [commit.name for commit in fetched])

    ok_clean_git(clone.path, annex=False)
    assert_in("origin/new_branch", clone.get_remote_branches())
    assert_in(filename, clone.get_files("origin/new_branch"))
    assert_false(op.exists(op.join(clone_path, filename)))  # not checked out

    # create a remote without an URL:
    origin.add_remote('not-available', 'git://example.com/not/existing')
    origin.config.unset('remote.not-available.url', where='local')

    # fetch without provided URL
    fetched = origin.fetch('not-available')
    # nothing was done, nothing returned:
    eq_([], fetched)
Beispiel #3
0
def test_knows_annex(here, there):
    from datalad.support.gitrepo import GitRepo
    from datalad.support.annexrepo import AnnexRepo
    GitRepo(path=here, create=True)
    assert_false(knows_annex(here))
    AnnexRepo(path=here, create=True)
    assert_true(knows_annex(here))
    GitRepo.clone(path=there, url=here, create=True)
    assert_true(knows_annex(there))
Beispiel #4
0
def test_GitRepo_add(src, path):

    gr = GitRepo(path, src)
    filename = "test_git_add.dat"
    with open(os.path.join(path, filename), 'w') as f:
        f.write("File to add to git")
    gr.git_add(filename)

    assert_in(filename, gr.get_indexed_files(), "%s not successfully added to %s" % (filename, path))
Beispiel #5
0
def _clone_from_any_source(sources, dest):
    # should not be the case, but we need to distinguish between failure
    # of git-clone, due to existing target and an unsuccessful clone
    # attempt. See below.
    existed = dest and exists(dest)
    for source_ in sources:
        try:
            lgr.debug("Retrieving a dataset from URL: "
                      "{0}".format(source_))
            with swallow_logs():
                GitRepo.clone(path=dest, url=source_, create=True)
            return source_  # do not bother with other sources if succeeded
        except GitCommandError as e:
            lgr.debug("Failed to retrieve from URL: "
                      "{0}".format(source_))
            if not existed and dest \
                    and exists(dest):
                lgr.debug("Wiping out unsuccessful clone attempt at "
                          "{}".format(dest))
                rmtree(dest)

            if source_ == sources[-1]:
                # Note: The following block is evaluated whenever we
                # fail even with the last try. Not nice, but currently
                # necessary until we get a more precise exception:
                ####################################
                # TODO: We may want to introduce a --force option to
                # overwrite the target.
                # TODO: Currently assuming if `existed` and there is a
                # GitCommandError means that these both things are connected.
                # Need newer GitPython to get stderr from GitCommandError
                # (already fixed within GitPython.)
                if existed:
                    # rudimentary check for an installed dataset at target:
                    # (TODO: eventually check for being the one, that this
                    # is about)
                    dest_ds = Dataset(dest)
                    if dest_ds.is_installed():
                        lgr.info("{0} appears to be installed already."
                                 "".format(dest_ds))
                        break
                    else:
                        lgr.warning("Target {0} already exists and is not "
                                    "an installed dataset. Skipped."
                                    "".format(dest))
                        # Keep original in debug output:
                        lgr.debug("Original failure:{0}"
                                  "{1}".format(linesep, exc_str(e)))
                        return None
                ##################

                # Re-raise if failed even with the last candidate
                lgr.debug("Unable to establish repository instance at "
                          "{0} from {1}"
                          "".format(dest, sources))
                raise
Beispiel #6
0
def test_GitRepo_commit(path):

    gr = GitRepo(path)
    filename = "test_git_add.dat"
    with open(os.path.join(path, filename), 'w') as f:
        f.write("File to add to git")

    gr.git_add(filename)
    gr.git_commit("Testing GitRepo.git_commit().")
    ok_clean_git(path, annex=False, untracked=[])
Beispiel #7
0
def test_GitRepo_dirty(path):

    repo = GitRepo(path, create=True)
    ok_(not repo.dirty)

    # untracked file
    with open(op.join(path, 'file1.txt'), 'w') as f:
        f.write('whatever')
    ok_(repo.dirty)
    # staged file
    repo.add('file1.txt')
    ok_(repo.dirty)
    # clean again
    repo.commit("file1.txt added")
    ok_(not repo.dirty)
    # modify to be the same
    with open(op.join(path, 'file1.txt'), 'w') as f:
        f.write('whatever')
    ok_(not repo.dirty)
    # modified file
    with open(op.join(path, 'file1.txt'), 'w') as f:
        f.write('something else')
    ok_(repo.dirty)
    # clean again
    repo.add('file1.txt')
    repo.commit("file1.txt modified")
    ok_(not repo.dirty)
Beispiel #8
0
def test_GitRepo_get_indexed_files(src, path):

    gr = GitRepo(path, src)
    idx_list = gr.get_indexed_files()

    runner = Runner()
    out = runner(['git', 'ls-files'], cwd=path)
    out_list = out[0].split()

    for item in idx_list:
        assert_in(item, out_list, "%s not found in output of git ls-files in %s" % (item, path))
    for item in out_list:
        assert_in(item, idx_list, "%s not found in output of get_indexed_files in %s" % (item, path))
Beispiel #9
0
def test_clone_dataladri(src, topurl, path):
    # make plain git repo
    ds_path = opj(src, 'ds')
    gr = GitRepo(ds_path, create=True)
    gr.add('test.txt')
    gr.commit('demo')
    Runner(cwd=gr.path)(['git', 'update-server-info'])
    # now install it somewhere else
    with patch('datalad.consts.DATASETS_TOPURL', topurl):
        ds = clone('///ds', path, result_xfm='datasets', return_type='item-or-list')
    eq_(ds.path, path)
    ok_clean_git(path, annex=False)
    ok_file_has_content(opj(path, 'test.txt'), 'some')
Beispiel #10
0
def test_GitRepo_get_toppath(repo, tempdir, repo2):
    reporeal = op.realpath(repo)
    eq_(GitRepo.get_toppath(repo, follow_up=False), reporeal)
    eq_(GitRepo.get_toppath(repo), repo)
    # Generate some nested directory
    GitRepo(repo2, create=True)
    repo2real = op.realpath(repo2)
    nested = op.join(repo2, "d1", "d2")
    os.makedirs(nested)
    eq_(GitRepo.get_toppath(nested, follow_up=False), repo2real)
    eq_(GitRepo.get_toppath(nested), repo2)
    # and if not under git, should return None
    eq_(GitRepo.get_toppath(tempdir), None)
def test_hierarchy(topdir):
    # GH 1178
    from datalad.api import create_test_dataset
    with swallow_logs(), swallow_outputs():
        dss = create_test_dataset(topdir, spec='1/1')

    eq_(len(dss), 3)
    eq_(dss[0], topdir)
    for ids, ds in enumerate(dss):
        ok_clean_git(ds, annex=False)
        # each one should have 2 commits (but the last one)-- one for file and
        # another one for sub-dataset
        repo = GitRepo(ds)
        eq_(len(list(repo.get_branch_commits())), 1 + int(ids<2))
Beispiel #12
0
def test_GitRepo_push_n_checkout(orig_path, clone_path):

    origin = GitRepo(orig_path)
    clone = GitRepo.clone(orig_path, clone_path)
    filename = get_most_obscure_supported_name()

    with open(op.join(clone_path, filename), 'w') as f:
        f.write("New file.")
    clone.add(filename)
    clone.commit("new file added.")
    # TODO: need checkout first:
    clone.push('origin', '+master:new-branch')
    origin.checkout('new-branch')
    ok_(op.exists(op.join(orig_path, filename)))
Beispiel #13
0
def knows_annex(path):
    """Returns whether at a given path there is information about an annex

    This includes actually present annexes, but also uninitialized ones, or
    even the presence of a remote annex branch.
    """
    from os.path import exists
    if not exists(path):
        lgr.debug("No annex: test path {0} doesn't exist".format(path))
        return False
    from datalad.support.gitrepo import GitRepo
    repo = GitRepo(path, create=False)
    return "origin/git-annex" in repo.git_get_remote_branches() \
           or "git-annex" in repo.git_get_branches()
Beispiel #14
0
def test_install_dataladri(src, topurl, path):
    # make plain git repo
    ds_path = opj(src, 'ds')
    gr = GitRepo(ds_path, create=True)
    gr.add('test.txt')
    gr.commit('demo')
    Runner(cwd=gr.path)(['git', 'update-server-info'])
    # now install it somewhere else
    with patch('datalad.consts.DATASETS_TOPURL', topurl), \
            swallow_logs():
        ds = install(path, source='///ds')
    eq_(ds.path, path)
    ok_clean_git(path, annex=False)
    ok_file_has_content(opj(path, 'test.txt'), 'some')
Beispiel #15
0
def test_install_simple_local(src, path):
    origin = Dataset(path)

    # now install it somewhere else
    ds = install(path, source=src, description='mydummy')
    eq_(ds.path, path)
    ok_(ds.is_installed())
    if not isinstance(origin.repo, AnnexRepo):
        # this means it is a GitRepo
        ok_(isinstance(origin.repo, GitRepo))
        # stays plain Git repo
        ok_(isinstance(ds.repo, GitRepo))
        ok_(not isinstance(ds.repo, AnnexRepo))
        ok_(GitRepo.is_valid_repo(ds.path))
        eq_(set(ds.repo.get_indexed_files()),
            {'test.dat', 'INFO.txt'})
        ok_clean_git(path, annex=False)
    else:
        # must be an annex
        ok_(isinstance(ds.repo, AnnexRepo))
        ok_(AnnexRepo.is_valid_repo(ds.path, allow_noninitialized=False))
        eq_(set(ds.repo.get_indexed_files()),
            {'test.dat', 'INFO.txt', 'test-annex.dat'})
        ok_clean_git(path, annex=True)
        # no content was installed:
        ok_(not ds.repo.file_has_content('test-annex.dat'))
        uuid_before = ds.repo.uuid
        eq_(ds.repo.get_description(), 'mydummy')

    # installing it again, shouldn't matter:
    res = install(path, source=src, result_xfm=None, return_type='list')
    assert_status('notneeded', res)
    ok_(ds.is_installed())
    if isinstance(origin.repo, AnnexRepo):
        eq_(uuid_before, ds.repo.uuid)
Beispiel #16
0
def _discover_subdatasets_recursively(
        discovered, top, trace, recursion_limit):
    # this beast walks the directory tree from a give `top` directory
    # and discovers valid repos that are scattered around, regardless
    # of whether they are already subdatasets or not
    # `trace` must be a list that has at least one element (the base
    # dataset)
    if recursion_limit is not None and len(trace) > recursion_limit:
        return
    if not isdir(top):
        return
    if not op.islink(top) and GitRepo.is_valid_repo(top):
        if top in discovered:
            # this was found already, assume everything beneath it too
            return
        discovered[top] = dict(
            path=top,
            # and its content
            process_content=True,
            type='dataset',
            parentds=trace[-1])
        # new node in the trace down
        trace = trace + [top]
    for path in listdir(top):
        path = opj(top, path)
        if not isdir(path):
            continue
        # next level down
        _discover_subdatasets_recursively(
            discovered, path, trace, recursion_limit)
Beispiel #17
0
def test_GitRepo_get_remote_url(orig_path, path):

    gr = GitRepo.clone(orig_path, path)
    gr.add_remote('github', 'git://github.com/datalad/testrepo--basic--r1')
    eq_(gr.get_remote_url('origin'), orig_path)
    eq_(gr.get_remote_url('github'),
                 'git://github.com/datalad/testrepo--basic--r1')
Beispiel #18
0
def test_submodule_deinit(path):
    from datalad.support.annexrepo import AnnexRepo

    top_repo = AnnexRepo(path, create=False)
    eq_({'subm 1', '2'}, {s.name for s in top_repo.get_submodules()})
    # note: here init=True is ok, since we are using it just for testing
    with swallow_logs(new_level=logging.WARN) as cml:
        top_repo.update_submodule('subm 1', init=True)
        assert_in('Do not use update_submodule with init=True', cml.out)
    top_repo.update_submodule('2', init=True)

    # ok_(all([s.module_exists() for s in top_repo.get_submodules()]))
    # TODO: old assertion above if non-bare? (can't use "direct mode" in test_gitrepo)
    # Alternatively: New testrepo (plain git submodules) and have a dedicated
    # test for annexes in addition
    ok_(all([GitRepo.is_valid_repo(op.join(top_repo.path, s.path))
             for s in top_repo.get_submodules()]))

    # modify submodule:
    with open(op.join(top_repo.path, 'subm 1', 'file_ut.dat'), "w") as f:
        f.write("some content")

    assert_raises(CommandError, top_repo.deinit_submodule, 'sub1')

    # using force should work:
    top_repo.deinit_submodule('subm 1', force=True)

    ok_(not top_repo.repo.submodule('subm 1').module_exists())
Beispiel #19
0
def _parse_git_submodules(ds, paths):
    """All known ones with some properties"""
    if not (ds.pathobj / ".gitmodules").exists():
        # easy way out. if there is no .gitmodules file
        # we cannot have (functional) subdatasets
        return

    if paths:
        paths = [
            p.relative_to(ds.pathobj)
            for p in paths
            if ds.pathobj == p or ds.pathobj in p.parents]
        if not paths:
            # we had path contraints, but none matched this dataset
            return
    for path, props in iteritems(ds.repo.get_content_info(
            paths=paths,
            ref=None,
            untracked='no',
            eval_file_type=False)):
        if props.get('type', None) != 'dataset':
            continue
        if ds.pathobj != ds.repo.pathobj:
            props['path'] = ds.pathobj / path.relative_to(ds.repo.pathobj)
        else:
            props['path'] = path
        if not path.exists() or not GitRepo.is_valid_repo(text_type(path)):
            props['state'] = 'absent'
        # TODO kill this after some time. We used to do custom things here
        # and gitshasum was called revision. Be nice and duplicate for a bit
        # wipe out when patience is gone
        props['revision'] = props['gitshasum']
        yield props
Beispiel #20
0
def test_GitRepo_add(src, path):

    gr = GitRepo.clone(src, path)
    filename = get_most_obscure_supported_name()
    with open(op.join(path, filename), 'w') as f:
        f.write("File to add to git")
    added = gr.add(filename)

    eq_(added, {'success': True, 'file': filename})
    assert_in(filename, gr.get_indexed_files(),
              "%s not successfully added to %s" % (filename, path))
    # uncommitted:
    ok_(gr.dirty)

    filename = "another.txt"
    with open(op.join(path, filename), 'w') as f:
        f.write("Another file to add to git")

    # include committing:
    added2 = gr.add(filename)
    gr.commit(msg="Add two files.")
    eq_(added2, {'success': True, 'file': filename})

    assert_in(filename, gr.get_indexed_files(),
              "%s not successfully added to %s" % (filename, path))
    ok_clean_git(path)
Beispiel #21
0
def test_publish_simple(origin, src_path, dst_path):

    # prepare src
    source = install(src_path, source=origin, recursive=True)
    # forget we cloned it (provide no 'origin' anymore), which should lead to
    # setting tracking branch to target:
    source.repo.remove_remote("origin")

    # create plain git at target:
    target = GitRepo(dst_path, create=True)
    target.checkout("TMP", ["-b"])
    source.repo.add_remote("target", dst_path)

    res = publish(dataset=source, to="target", result_xfm='datasets')
    eq_(res, [source])

    ok_clean_git(source.repo, annex=None)
    ok_clean_git(target, annex=None)
    eq_(list(target.get_branch_commits("master")),
        list(source.repo.get_branch_commits("master")))

    # don't fail when doing it again
    res = publish(dataset=source, to="target")
    # and nothing is pushed
    assert_result_count(res, 1, status='notneeded')

    ok_clean_git(source.repo, annex=None)
    ok_clean_git(target, annex=None)
    eq_(list(target.get_branch_commits("master")),
        list(source.repo.get_branch_commits("master")))
    eq_(list(target.get_branch_commits("git-annex")),
        list(source.repo.get_branch_commits("git-annex")))

    # 'target/master' should be tracking branch at this point, so
    # try publishing without `to`:
    # MIH: Nope, we don't automatically add this anymore

    # some modification:
    with open(opj(src_path, 'test_mod_file'), "w") as f:
        f.write("Some additional stuff.")
    source.add(opj(src_path, 'test_mod_file'), to_git=True,
               message="Modified.")
    ok_clean_git(source.repo, annex=None)

    res = publish(dataset=source, to='target', result_xfm='datasets')
    eq_(res, [source])

    ok_clean_git(dst_path, annex=None)
    eq_(list(target.get_branch_commits("master")),
        list(source.repo.get_branch_commits("master")))
    # Since git-annex 6.20170220, post-receive hook gets triggered
    # which results in entry being added for that repo into uuid.log on remote
    # end since then finally git-annex senses that it needs to init that remote,
    # so it might have 1 more commit than local.
    # see https://github.com/datalad/datalad/issues/1319
    ok_(set(source.repo.get_branch_commits("git-annex")).issubset(
        set(target.get_branch_commits("git-annex"))))
Beispiel #22
0
def _install_necessary_subdatasets(
        ds, path, reckless, refds_path, description=None):
    """Installs subdatasets of `ds`, that are necessary to obtain in order
    to have access to `path`.

    Gets the subdataset containing `path` regardless of whether or not it was
    already installed. While doing so, installs everything necessary in between
    the uppermost installed one and `path`.

    Note: `ds` itself has to be installed.

    Parameters
    ----------
    ds: Dataset
    path: str
    reckless: bool
    """
    # figuring out what dataset to start with, --contains limits --recursive
    # to visit only subdataset on the trajectory to the target path
    subds_trail = ds.subdatasets(contains=path, recursive=True)
    if not subds_trail:
        # there is not a single known subdataset (installed or not)
        # for this path -- job done
        return
    # otherwise we start with the one deepest down
    cur_subds = subds_trail[-1]

    while not GitRepo.is_valid_repo(cur_subds['path']):
        # install using helper that give some flexibility regarding where to
        # get the module from
        try:
            sd = _install_subds_from_flexible_source(
                Dataset(cur_subds['parentds']),
                relpath(cur_subds['path'], start=cur_subds['parentds']),
                cur_subds['gitmodule_url'],
                reckless,
                description=description)
        except Exception as e:
            # skip all of downstairs, if we didn't manage to install subdataset
            yield get_status_dict(
                'install', path=cur_subds['path'], type='dataset',
                status='error', logger=lgr, refds=refds_path,
                message=("Installation of subdatasets %s failed with exception: %s",
                         cur_subds['path'], exc_str(e)))
            return

        # report installation, whether it helped or not
        yield get_status_dict(
            'install', ds=sd, status='ok', logger=lgr, refds=refds_path,
            message=("Installed subdataset in order to get %s", path))

        # now check whether the just installed subds brought us any closer to
        # the target path
        subds_trail = sd.subdatasets(contains=path, recursive=False)
        if not subds_trail:
            # no (newly available) subdataset get's us any closer
            return
        # next round
        cur_subds = subds_trail[-1]
Beispiel #23
0
def test_GitRepo_remote_remove(orig_path, path):

    gr = GitRepo.clone(orig_path, path)
    gr.add_remote('github', 'git://github.com/datalad/testrepo--basic--r1')
    gr.remove_remote('github')
    out = gr.get_remotes()
    eq_(len(out), 1)
    assert_in('origin', out)
Beispiel #24
0
def test_GitRepo_remove(path):

    gr = GitRepo(path, create=True)
    gr.add('*')
    gr.commit("committing all the files")

    eq_(gr.remove('file'), ['file'])
    eq_(set(gr.remove('d', r=True, f=True)), {'d/f1', 'd/f2'})

    eq_(set(gr.remove('*', r=True, f=True)), {'file2', 'd2/f1', 'd2/f2'})
Beispiel #25
0
def test_install_dataset_from_just_source(url, path):
    with chpwd(path, mkdir=True):
        ds = install(source=url)

    ok_startswith(ds.path, path)
    ok_(ds.is_installed())
    ok_(GitRepo.is_valid_repo(ds.path))
    ok_clean_git(ds.path, annex=None)
    assert_in('INFO.txt', ds.repo.get_indexed_files())
Beispiel #26
0
def test_clone_dataset_from_just_source(url, path):
    with chpwd(path, mkdir=True):
        ds = clone(url, result_xfm='datasets', return_type='item-or-list')

    ok_startswith(ds.path, path)
    ok_(ds.is_installed())
    ok_(GitRepo.is_valid_repo(ds.path))
    ok_clean_git(ds.path, annex=None)
    assert_in('INFO.txt', ds.repo.get_indexed_files())
Beispiel #27
0
def test_get_git_dir(path):
    # minimal, only missing coverage
    assert_raises(RuntimeError, GitRepo.get_git_dir, path)

    srcpath = opj(path, 'src')
    targetpath = opj(path, 'target')
    targetgitpath = opj(targetpath, '.git')
    os.makedirs(srcpath)
    os.makedirs(targetpath)
    if not on_windows:
        # with PY3 would also work with Windows 6+
        os.symlink(srcpath, targetgitpath)
        eq_(srcpath, GitRepo.get_git_dir(targetpath))
        # cleanup for following test
        unlink(targetgitpath)
    with open(targetgitpath, 'w') as f:
        f.write('gitdir: {}'.format(srcpath))
    eq_(srcpath, GitRepo.get_git_dir(targetpath))
Beispiel #28
0
def test_publish_simple(origin, src_path, dst_path):

    # prepare src
    source = install(path=src_path, source=origin, recursive=True)
    # TODO: For now, circumnavigate the detached head issue.
    # Figure out, what to do.
    for subds in source.get_dataset_handles(recursive=True):
        AnnexRepo(opj(src_path, subds), init=True, create=True).git_checkout("master")
    # forget we cloned it (provide no 'origin' anymore), which should lead to
    # setting tracking branch to target:
    source.repo.git_remote_remove("origin")

    # create plain git at target:
    target = GitRepo(dst_path, create=True)
    target.git_checkout("TMP", "-b")
    source.repo.git_remote_add("target", dst_path)

    res = publish(dataset=source, dest="target")
    eq_(res, source)

    ok_clean_git(src_path, annex=False)
    ok_clean_git(dst_path, annex=False)
    eq_(list(target.git_get_branch_commits("master")),
        list(source.repo.git_get_branch_commits("master")))

    # don't fail when doing it again
    res = publish(dataset=source, dest="target")
    eq_(res, source)

    ok_clean_git(src_path, annex=False)
    ok_clean_git(dst_path, annex=False)
    eq_(list(target.git_get_branch_commits("master")),
        list(source.repo.git_get_branch_commits("master")))
    eq_(list(target.git_get_branch_commits("git-annex")),
        list(source.repo.git_get_branch_commits("git-annex")))

    # 'target/master' should be tracking branch at this point, so
    # try publishing without `dest`:

    # some modification:
    with open(opj(src_path, 'test_mod_file'), "w") as f:
        f.write("Some additional stuff.")
    source.repo.git_add(opj(src_path, 'test_mod_file'))
    source.repo.git_commit("Modified.")
    ok_clean_git(src_path, annex=False)

    res = publish(dataset=source)
    eq_(res, source)

    ok_clean_git(dst_path, annex=False)
    eq_(list(target.git_get_branch_commits("master")),
        list(source.repo.git_get_branch_commits("master")))
    eq_(list(target.git_get_branch_commits("git-annex")),
        list(source.repo.git_get_branch_commits("git-annex")))
Beispiel #29
0
def test_install_dataset_from_instance(src, dst):
    origin = Dataset(src)
    clone = install(source=origin, path=dst)

    assert_is_instance(clone, Dataset)
    ok_startswith(clone.path, dst)
    ok_(clone.is_installed())
    ok_(GitRepo.is_valid_repo(clone.path))
    ok_clean_git(clone.path, annex=None)
    assert_in('INFO.txt', clone.repo.get_indexed_files())
Beispiel #30
0
def test_install_plain_git(src, path):
    # make plain git repo
    gr = GitRepo(src, create=True)
    gr.git_add('test.txt')
    gr.git_commit('demo')
    # now install it somewhere else
    ds = install(path=path, source=src)
    # stays plain Git repo
    ok_(isinstance(ds.repo, GitRepo))
    # now go back to original
    ds = Dataset(src)
    ok_(isinstance(ds.repo, GitRepo))
    # installing a file must fail, as we decided not to perform magical upgrades
    # GitRepo -> AnnexRepo
    assert_raises(RuntimeError, ds.install, path='test2.txt', source=opj(src, 'test2.txt'))
    # but works when forced
    ifiles = ds.install(path='test2.txt', source=opj(src, 'test2.txt'), add_data_to_git=True)
    ok_startswith(ifiles, ds.path)
    ok_(ifiles.endswith('test2.txt'))
    ok_('test2.txt' in ds.repo.get_indexed_files())
Beispiel #31
0
def test_target_ssh_recursive(origin, src_path, target_path):

    # prepare src
    source = install(src_path, source=origin, recursive=True)[0]

    sub1 = Dataset(opj(src_path, "subm 1"))
    sub2 = Dataset(opj(src_path, "subm 2"))

    for flat in False, True:
        target_path_ = target_dir_tpl = target_path + "-" + str(flat)

        if flat:
            target_dir_tpl += "/%NAME"
            sep = '-'
        else:
            sep = os.path.sep

        if flat:
            # now that create_sibling also does fetch -- the related problem
            # so skipping this early
            raise SkipTest(
                'TODO: Make publish work for flat datasets, it currently breaks'
            )

        remote_name = 'remote-' + str(flat)
        # TODO: there is f.ckup with paths so assert_create fails ATM
        # And let's test without explicit dataset being provided
        with chpwd(source.path):
            #assert_create_sshwebserver(
            create_sibling(target=remote_name,
                           sshurl="ssh://localhost" + target_path_,
                           target_dir=target_dir_tpl,
                           recursive=True,
                           ui=True)

        # raise if git repos were not created
        for suffix in [sep + 'subm 1', sep + 'subm 2', '']:
            target_dir = opj(target_path_,
                             basename(src_path) if flat else "").rstrip(
                                 os.path.sep) + suffix
            # raise if git repos were not created
            GitRepo(target_dir, create=False)

            _test_correct_publish(target_dir, rootds=not suffix, flat=flat)

        for repo in [source.repo, sub1.repo, sub2.repo]:
            assert_not_in("local_target", repo.get_remotes())

        # now, push should work:
        publish(dataset=source, to=remote_name)
Beispiel #32
0
 def func(arg, top, names):
     refpath, ignore, dirs = arg
     legit_names = []
     for n in names:
         path = opj(top, n)
         if not isdir(path) or path in ignore:
             pass
         elif path != refpath and GitRepo.is_valid_repo(path):
             # mount point, keep but don't dive into
             dirs.append(path)
         else:
             legit_names.append(n)
             dirs.append(path)
     names[:] = legit_names
def test_ok_file_under_git_symlinks(path=None):
    # Test that works correctly under symlinked path
    orepo = GitRepo(path)
    orepo.add('ingit')
    orepo.commit('msg')
    orepo.add('staged')
    lpath = path + "-symlink"  # will also be removed AFAIK by our tempfile handling
    Path(lpath).symlink_to(Path(path))
    ok_symlink(lpath)
    ok_file_under_git(op.join(path, 'ingit'))
    ok_file_under_git(op.join(lpath, 'ingit'))
    ok_file_under_git(op.join(lpath, 'staged'))
    with assert_raises(AssertionError):
        ok_file_under_git(op.join(lpath, 'notingit'))
    with assert_raises(AssertionError):
        ok_file_under_git(op.join(lpath, 'nonexisting'))
Beispiel #34
0
 def __call__(dataset=None, what=None, recursive=False, recursion_limit=None):
     ds = require_dataset(dataset, purpose='clean-up')
     res_kwargs = dict(action='clean', logger=lgr, refds=ds.path)
     for ap in AnnotatePaths.__call__(
             dataset=ds.path,
             recursive=recursive,
             recursion_limit=recursion_limit,
             action='clean',
             unavailable_path_status='impossible',
             nondataset_path_status='impossible',
             return_type='generator',
             on_failure='ignore'):
         if ap.get('status', None):
             yield ap
             continue
         if ap.get('type', None) != 'dataset':
             ap.update(status='impossible',
                       message='only datasets can be cleaned')
             yield ap
             continue
         d = ap['path']
         gitdir = GitRepo.get_git_dir(d)
         for dirpath, flag, msg, sing_pl in [
             (ARCHIVES_TEMP_DIR, "cached-archives",
              "temporary archive", ("directory", "directories")),
             (ANNEX_TEMP_DIR, "annex-tmp",
              "temporary annex", ("file", "files")),
             (opj(gitdir, SEARCH_INDEX_DOTGITDIR), 'search-index',
              "metadata search index", ("file", "files")),
         ]:
             topdir = opj(d, dirpath)
             lgr.debug("Considering to clean %s:%s", d, dirpath)
             if not ((what is None) or (flag in what)):
                 yield get_status_dict(
                     path=topdir, status='notneeded', type='directory', **res_kwargs)
                 continue
             paths = glob(opj(topdir, '*'))
             if not paths:
                 yield get_status_dict(
                     path=topdir, status='notneeded', type='directory', **res_kwargs)
                 continue
             pl = len(paths) > 1
             message = ("Removed %d %s %s: %s",
                        len(paths), msg, sing_pl[int(pl)],
                        ", ".join(sorted([x[len(topdir) + 1:] for x in paths])))
             rmtree(topdir)
             yield get_status_dict(
                 path=topdir, status='ok', type='dir', message=message,
                 **res_kwargs)
Beispiel #35
0
def test_GitRepo_instance_from_clone(src, dst):

    gr = GitRepo.clone(src, dst)
    assert_is_instance(gr, GitRepo, "GitRepo was not created.")
    assert_is_instance(gr.repo, gitpy.Repo,
                       "Failed to instantiate GitPython Repo object.")
    ok_(op.exists(op.join(dst, '.git')))

    # do it again should raise GitCommandError since git will notice there's
    # already a git-repo at that path and therefore can't clone to `dst`
    # Note: Since GitRepo is now a WeakSingletonRepo, this is prevented from
    # happening atm. Disabling for now:
    #    raise SkipTest("Disabled for RF: WeakSingletonRepo")
    with swallow_logs() as logs:
        assert_raises(GitCommandError, GitRepo.clone, src, dst)
Beispiel #36
0
def test_multiway_merge(path=None):
    # prepare ds with two siblings, but no tracking branch
    ds = Dataset(op.join(path, 'ds_orig')).create()
    r1 = AnnexRepo(path=op.join(path, 'ds_r1'), git_opts={'bare': True})
    r2 = GitRepo(path=op.join(path, 'ds_r2'), git_opts={'bare': True})
    ds.siblings(action='add', name='r1', url=r1.path)
    ds.siblings(action='add', name='r2', url=r2.path)
    assert_status('ok', ds.push(to='r1'))
    # push unlike publish reports on r2 not being an annex remote with a
    # 'notneeded'
    assert_status(('ok', 'notneeded'), ds.push(to='r2'))
    # just a fetch should be no issue
    assert_status('ok', ds.update())
    # ATM we do not support multi-way merges
    assert_status('impossible', ds.update(merge=True, on_failure='ignore'))
Beispiel #37
0
def test_bare(path):
    # can we handle a bare repo?
    gr = GitRepo(path, create=True, bare=True)
    # do we read the correct local config?
    assert_in(gr.pathobj / 'config', gr.config._stores['git']['files'])
    # any sensible (and also our CI) test environment(s) should have this
    assert_in('user.name', gr.config)
    # not set something that wasn't there
    obscure_key = 'sec.reallyobscurename!@@.key'
    assert_not_in(obscure_key, gr.config)
    # to the local config, which is easily accessible
    gr.config.set(obscure_key, 'myvalue', where='local')
    assert_equal(gr.config.get(obscure_key), 'myvalue')
    # now make sure the config is where we think it is
    assert_in(obscure_key.split('.')[1], (gr.pathobj / 'config').read_text())
Beispiel #38
0
def test_crazy_cfg(path):
    cfg = ConfigManager(GitRepo(opj(path, 'ds'), create=True),
                        source='dataset')
    assert_in('crazy.padry', cfg)
    # make sure crazy config is not read when in local mode
    cfg = ConfigManager(Dataset(opj(path, 'ds')), source='local')
    assert_not_in('crazy.padry', cfg)
    # it will make it in in 'any' mode though
    cfg = ConfigManager(Dataset(opj(path, 'ds')), source='any')
    assert_in('crazy.padry', cfg)
    # typos in the source mode arg will not have silent side-effects
    assert_raises(ValueError,
                  ConfigManager,
                  Dataset(opj(path, 'ds')),
                  source='locale')
Beispiel #39
0
def knows_annex(path):
    """Returns whether at a given path there is information about an annex

    It is just a thin wrapper around GitRepo.is_with_annex() classmethod
    which also checks for `path` to exist first.

    This includes actually present annexes, but also uninitialized ones, or
    even the presence of a remote annex branch.
    """
    from os.path import exists
    if not exists(path):
        lgr.debug("No annex: test path {0} doesn't exist".format(path))
        return False
    from datalad.support.gitrepo import GitRepo
    return GitRepo(path, init=False, create=False).is_with_annex()
Beispiel #40
0
def test_GitRepo_commit(path):

    gr = GitRepo(path)
    filename = "test_git_add.dat"
    with open(os.path.join(path, filename), 'w') as f:
        f.write("File to add to git")

    gr.git_add(filename)
    gr.git_commit("Testing GitRepo.git_commit().")
    ok_clean_git(path, annex=False, untracked=[])
Beispiel #41
0
def test_normalize_path(git_path):

    cwd = os.getcwd()
    gr = GitRepo(git_path)

    # cwd is currently outside the repo, so any relative path
    # should be interpreted as relative to `annex_path`
    assert_raises(FileNotInRepositoryError, _normalize_path, gr.path,
                  os.getcwd())

    result = _normalize_path(gr.path, "testfile")
    assert_equal(result, "testfile", "_normalize_path() returned %s" % result)

    # result = _normalize_path(gr.path, os.path.join('.', 'testfile'))
    # assert_equal(result, "testfile", "_normalize_path() returned %s" % result)
    #
    # result = _normalize_path(gr.path, os.path.join('testdir', '..', 'testfile'))
    # assert_equal(result, "testfile", "_normalize_path() returned %s" % result)
    # Note: By now, normpath within normalize_paths() is disabled, therefore
    # disable these tests.

    result = _normalize_path(gr.path, os.path.join('testdir', 'testfile'))
    assert_equal(result, os.path.join("testdir", "testfile"),
                 "_normalize_path() returned %s" % result)

    result = _normalize_path(gr.path, os.path.join(git_path, "testfile"))
    assert_equal(result, "testfile", "_normalize_path() returned %s" % result)

    # now we are inside, so relative paths are relative to cwd and have
    # to be converted to be relative to annex_path:
    os.chdir(os.path.join(git_path, 'd1', 'd2'))

    result = _normalize_path(gr.path, "testfile")
    assert_equal(result, os.path.join('d1', 'd2', 'testfile'),
                 "_normalize_path() returned %s" % result)

    result = _normalize_path(gr.path, os.path.join('..', 'testfile'))
    assert_equal(result, os.path.join('d1', 'testfile'),
                 "_normalize_path() returned %s" % result)

    assert_raises(FileNotInRepositoryError, _normalize_path, gr.path,
                  os.path.join(git_path, '..', 'outside'))

    result = _normalize_path(gr.path, os.path.join(git_path, 'd1', 'testfile'))
    assert_equal(result, os.path.join('d1', 'testfile'),
                 "_normalize_path() returned %s" % result)

    os.chdir(cwd)
Beispiel #42
0
 def _get_new_vcs(ds, source, vcs):
     if source is None:
         # always come with annex when created from scratch
         lgr.info("Creating a new annex repo at %s", ds.path)
         vcs = AnnexRepo(ds.path, url=source, create=True)
     else:
         # when obtained from remote, try with plain Git
         lgr.info("Creating a new git repo at %s", ds.path)
         vcs = GitRepo(ds.path, url=source, create=True)
         if knows_annex(ds.path):
             # init annex when traces of a remote annex can be detected
             lgr.info("Initializing annex repo at %s", ds.path)
             vcs = AnnexRepo(ds.path, init=True)
         else:
             lgr.debug("New repository clone has no traces of an annex")
     return vcs
Beispiel #43
0
def test_get_default_title(path):
    repo = GitRepo(path)
    ds = Dataset(path)
    # There is no dataset initialized yet, so only path will be the title
    dirname = op.basename(path)
    eq_(_get_default_title(ds), dirname)

    # Initialize and get UUID
    ds.create(force=True)
    eq_(_get_default_title(ds), '{dirname}#{ds.id}'.format(**locals()))

    # Tag and get @version
    # cannot use ds.save since our tags are not annotated,
    # see https://github.com/datalad/datalad/issues/4139
    ds.repo.tag("0.1", message="important version")
    eq_(_get_default_title(ds), '{dirname}#{ds.id}@0.1'.format(**locals()))
Beispiel #44
0
def test_repo_cache(path):
    ds = Dataset(path)
    # none by default
    eq_(ds.repo, None)
    # make Git repo manually
    git = GitRepo(path=path, create=True)
    repo = ds.repo
    # got one
    assert_false(repo is None)
    # stays that one
    assert_true(ds.repo is repo)
    # now turn into an annex
    annex = AnnexRepo(path=path, create=True)
    # repo instance must change
    assert_false(ds.repo is repo)
    assert_true(isinstance(ds.repo, AnnexRepo))
Beispiel #45
0
def test_hashable(path):
    path = ut.Path(path)
    tryme = set()
    # is it considered hashable at all
    tryme.add(Dataset(path / 'one'))
    eq_(len(tryme), 1)
    # do another one, same class different path
    tryme.add(Dataset(path / 'two'))
    eq_(len(tryme), 2)
    # test whether two different types of repo instances pointing
    # to the same repo on disk are considered different
    Dataset(path).create()
    tryme.add(GitRepo(path))
    eq_(len(tryme), 3)
    tryme.add(AnnexRepo(path))
    eq_(len(tryme), 4)
Beispiel #46
0
def test_ls_repos(toppath):
    # smoke test pretty much
    GitRepo(toppath + '1', create=True)
    AnnexRepo(toppath + '2', create=True)
    repos = glob(toppath + '*')

    for args in (repos, repos + ["/some/bogus/file"]):
        for recursive in [False, True]:
            # in both cases shouldn't fail
            with swallow_outputs() as cmo:
                ls(args, recursive=recursive)
                assert_equal(len(cmo.out.rstrip().split('\n')), len(args))
                assert_in('[annex]', cmo.out)
                assert_in('[git]', cmo.out)
                assert_in('master', cmo.out)
                if "bogus" in args:
                    assert_in('unknown', cmo.out)
Beispiel #47
0
def test_GitRepo_get_indexed_files(src, path):

    gr = GitRepo.clone(src, path)
    idx_list = gr.get_indexed_files()

    runner = Runner()
    out = runner(['git', 'ls-files'], cwd=path)
    out_list = list(filter(bool, out[0].split('\n')))

    for item in idx_list:
        assert_in(
            item, out_list,
            "%s not found in output of git ls-files in %s" % (item, path))
    for item in out_list:
        assert_in(
            item, idx_list,
            "%s not found in output of get_indexed_files in %s" % (item, path))
Beispiel #48
0
def _adj2subtrees(base, adj, subs):
    # given a set of parent-child mapping, compute a mapping of each parent
    # to all its (grand)children of any depth level
    subtrees = dict(adj)
    subs = set(subs)
    # from bottom up
    for ds in sorted(adj, reverse=True):
        subtree = []
        for sub in subtrees[ds]:
            subtree.append(sub)
            subtree.extend(subtrees.get(sub, []))
        subtrees[ds] = subtree
    # give each leaf dataset an entry too
    for sub in subs:
        if sub not in subtrees and GitRepo.is_valid_repo(sub):
            subtrees[sub] = []
    return subtrees
Beispiel #49
0
def test_get_tracking_branch(o_path, c_path):

    clone = GitRepo.clone(o_path, c_path)
    # Note, that the default branch might differ even if it is always 'master'.
    # For direct mode annex repositories it would then be "annex/direct/master"
    # for example. Therefore use whatever branch is checked out by default:
    master_branch = clone.get_active_branch()
    ok_(master_branch)

    eq_(('origin', 'refs/heads/' + master_branch), clone.get_tracking_branch())

    clone.checkout('new_branch', ['-b'])

    eq_((None, None), clone.get_tracking_branch())

    eq_(('origin', 'refs/heads/' + master_branch),
        clone.get_tracking_branch(master_branch))
Beispiel #50
0
def test_GitRepo_count_objects(repo_path):

    repo = GitRepo(repo_path, create=True)
    # test if dictionary returned
    eq_(isinstance(repo.count_objects, dict), True)
    # test if dictionary contains keys and values we expect
    empty_count = {
        'count': 0,
        'garbage': 0,
        'in-pack': 0,
        'packs': 0,
        'prune-packable': 0,
        'size': 0,
        'size-garbage': 0,
        'size-pack': 0
    }
    eq_(empty_count, repo.count_objects)
Beispiel #51
0
def test_clone_dataladri(src, topurl, path):
    # make plain git repo
    ds_path = Path(src) / 'ds'
    gr = GitRepo(ds_path, create=True)
    gr.add('test.txt')
    gr.commit('demo')
    Runner(cwd=gr.path).run(['git', 'update-server-info'])
    # now install it somewhere else
    with patch('datalad.consts.DATASETS_TOPURL', topurl):
        ds = clone('///ds', path, result_xfm='datasets', return_type='item-or-list')
    eq_(ds.path, path)
    assert_repo_status(path, annex=False)
    ok_file_has_content(ds.pathobj / 'test.txt', 'some')
Beispiel #52
0
def test_GitRepo_get_toppath(repo, tempdir, repo2):
    reporeal = op.realpath(repo)
    eq_(GitRepo.get_toppath(repo, follow_up=False), reporeal)
    eq_(GitRepo.get_toppath(repo), repo)
    # Generate some nested directory
    GitRepo(repo2, create=True)
    repo2real = op.realpath(repo2)
    nested = op.join(repo2, "d1", "d2")
    os.makedirs(nested)
    eq_(GitRepo.get_toppath(nested, follow_up=False), repo2real)
    eq_(GitRepo.get_toppath(nested), repo2)
    # and if not under git, should return None
    eq_(GitRepo.get_toppath(tempdir), None)
Beispiel #53
0
def test_GitRepo_get_files(url, path):

    gr = GitRepo.clone(url, path)

    # get the expected files via os for comparison:
    os_files = set()
    for (dirpath, dirnames, filenames) in os.walk(path):
        rel_dir = os.path.relpath(dirpath, start=path)
        if rel_dir.startswith(".git"):
            continue
        for file_ in filenames:
            file_path = os.path.normpath(op.join(rel_dir, file_))
            os_files.add(file_path)

    # get the files via GitRepo:
    local_files = set(gr.get_files())
    remote_files = set(gr.get_files(branch="origin/master"))

    eq_(local_files, set(gr.get_indexed_files()))
    eq_(local_files, remote_files)
    eq_(local_files, os_files)

    # create a different branch:
    gr.checkout('new_branch', ['-b'])
    filename = 'another_file.dat'
    with open(op.join(path, filename), 'w') as f:
        f.write("something")
    gr.add(filename)
    gr.commit("Added.")

    # now get the files again:
    local_files = set(gr.get_files())
    eq_(local_files, os_files.union({filename}))
    # retrieve remote branch again, which should not have changed:
    remote_files = set(gr.get_files(branch="origin/master"))
    eq_(remote_files, os_files)
    eq_(set([filename]), local_files.difference(remote_files))

    # switch back and query non-active branch:
    gr.checkout('master')
    local_files = set(gr.get_files())
    branch_files = set(gr.get_files(branch="new_branch"))
    eq_(set([filename]), branch_files.difference(local_files))
Beispiel #54
0
def test_subdataset_add_file_end_to_end(file_name):

    test_path = "d_1/d_1.0/f_1.0.0"

    json.dump({
        **metadata_template,
        **additional_keys_template,
        "type": "file",
        "path": test_path
    }, open(file_name, "tw"))

    with tempfile.TemporaryDirectory() as temp_dir:
        git_repo = GitRepo(temp_dir)

        res = meta_add(metadata=file_name, metadata_store=git_repo.path)
        assert_result_count(res, 1)
        assert_result_count(res, 1, type='file')
        assert_result_count(res, 0, type='dataset')

        # Verify dataset level metadata was added
        root_dataset_id = UUID(additional_keys_template["root_dataset_id"])
        root_dataset_version = additional_keys_template["root_dataset_version"]
        dataset_tree_path = MetadataPath(
            additional_keys_template["dataset_path"])

        tree_version_list, uuid_set, mrr = _get_top_nodes(
            git_repo,
            root_dataset_id,
            root_dataset_version)

        _, dataset_tree = tree_version_list.get_dataset_tree(
            root_dataset_version)

        mrr = dataset_tree.get_metadata_root_record(dataset_tree_path)
        eq_(mrr.dataset_identifier, UUID(metadata_template["dataset_id"]))

        file_tree = mrr.get_file_tree()
        assert_is_not_none(file_tree)
        assert_true(test_path in file_tree)

        metadata = file_tree.get_metadata(MetadataPath(test_path))
        metadata_content = _get_metadata_content(metadata)
        eq_(metadata_content, metadata_template["extracted_metadata"])
Beispiel #55
0
def test_install_dataladri(src, topurl, path):
    # make plain git repo
    ds_path = opj(src, 'ds')
    gr = GitRepo(ds_path, create=True)
    gr.add('test.txt')
    gr.commit('demo')
    Runner(cwd=gr.path)(['git', 'update-server-info'])
    # now install it somewhere else
    with patch('datalad.consts.DATASETS_TOPURL', topurl), \
            swallow_logs():
        ds = install(path, source='///ds')
    eq_(ds.path, path)
    ok_clean_git(path, annex=False)
    ok_file_has_content(opj(path, 'test.txt'), 'some')
Beispiel #56
0
def test_get_refcommit(path):
    # # dataset without a single commit
    ds = Dataset(GitRepo(path, create=True).path)
    eq_(get_refcommit(ds), None)
    # we get a commit via create
    ds.create(force=True)
    # still not metadata-relevant changes
    eq_(get_refcommit(ds), None)
    # place irrelevant file and commit
    create_tree(ds.path, {'.datalad': {'ignored': 'content'}})
    ds.save()
    # no change to the previous run, irrelevant changes are ignored
    eq_(get_refcommit(ds), None)
    # a real change
    create_tree(ds.path, {'real': 'othercontent'})
    ds.save()
    real_change = get_refcommit(ds)
    eq_(real_change, ds.repo.get_hexsha('HEAD'))
    # another irrelevant change, no change in refcommit
    create_tree(ds.path, {'.datalad': {'ignored2': 'morecontent'}})
    ds.save()
    eq_(get_refcommit(ds), real_change)
    # we can pick up deletions
    os.unlink(text_type(ds.pathobj / 'real'))
    ds.save()
    eq_(get_refcommit(ds), ds.repo.get_hexsha('HEAD'))
    # subdataset addition
    subds = ds.create('sub')
    subds_addition = get_refcommit(ds)
    eq_(subds_addition, ds.repo.get_hexsha('HEAD'))
    # another irrelevant change, no change in refcommit, despite subds presence
    create_tree(ds.path, {'.datalad': {'ignored3': 'evenmorecontent'}})
    ds.save()
    eq_(get_refcommit(ds), subds_addition)
    # subdataset modification is a relevant change
    create_tree(subds.path, {'real': 'real'})
    ds.save(recursive=True)
    eq_(get_refcommit(ds), ds.repo.get_hexsha('HEAD'))
    # and subdataset removal
    ds.remove('sub', check=False)
    assert_repo_status(ds.path)
    eq_(get_refcommit(ds), ds.repo.get_hexsha('HEAD'))
Beispiel #57
0
def require_dataset(dataset, check_installed=True, purpose=None):
    """Helper function to resolve a dataset.

    This function tries to resolve a dataset given an input argument,
    or based on the process' working directory, if `None` is given.

    Parameters
    ----------
    dataset : None or path or Dataset
      Some value identifying a dataset or `None`. In the latter case
      a dataset will be searched based on the process working directory.
    check_installed : bool, optional
      If True, an optional check whether the resolved dataset is
      properly installed will be performed.
    purpose : str, optional
      This string will be inserted in error messages to make them more
      informative. The pattern is "... dataset for <STRING>".

    Returns
    -------
    Dataset
      Or raises an exception (InsufficientArgumentsError).
    """
    if dataset is not None and not isinstance(dataset, Dataset):
        dataset = Dataset(dataset)

    if dataset is None:  # possible scenario of cmdline calls
        dspath = GitRepo.get_toppath(getpwd())
        if not dspath:
            raise NoDatasetArgumentFound("No dataset found")
        dataset = Dataset(dspath)

    assert (dataset is not None)
    lgr.debug("Resolved dataset{0}: {1}".format(
        ' for {}'.format(purpose) if purpose else '', dataset))

    if check_installed and not dataset.is_installed():
        raise ValueError("No installed dataset found at "
                         "{0}.".format(dataset.path))

    return dataset
Beispiel #58
0
def download_dataset(repo=None,
                     remote_path=None,
                     local_folder=None,
                     update_if_exists=False,
                     unlock=False):
    assert HAVE_DATALAD, 'You need to install datalad'

    if repo is None:
        #  print('Use gin NeuralEnsemble/ephy_testing_data')
        repo = 'https://gin.g-node.org/NeuralEnsemble/ephy_testing_data'

    if local_folder is None:
        base_local_folder = get_global_dataset_folder()
        base_local_folder.mkdir(exist_ok=True)
        #  if not is_set_global_dataset_folder():
        #  print(f'Local folder is {base_local_folder}, Use set_global_dataset_folder() to set it globally')
        local_folder = base_local_folder / repo.split('/')[-1]

    if local_folder.exists() and GitRepo.is_valid_repo(local_folder):
        dataset = datalad.api.Dataset(path=local_folder)
        # make sure git repo is in clean state
        repo = dataset.repo
        if update_if_exists:
            repo.call_git(['checkout', '--force', 'master'])
            dataset.update(merge=True)
    else:
        dataset = datalad.api.install(path=local_folder, source=repo)

    if remote_path is None:
        print('Bad boy: you have to provide "remote_path"')
        return

    local_path = local_folder / remote_path

    dataset.get(remote_path)

    # unlocking is necessary for binding volume to containers
    if unlock:
        dataset.unlock(remote_path, recursive=True)

    return local_path
Beispiel #59
0
def test_clone_dataladri(src, topurl, path):
    # make plain git repo
    ds_path = opj(src, 'ds')
    gr = GitRepo(ds_path, create=True)
    gr.add('test.txt')
    gr.commit('demo')
    Runner(cwd=gr.path)(['git', 'update-server-info'])
    # now install it somewhere else
    with patch('datalad.support.network.DATASETS_TOPURL', topurl):
        ds = clone('///ds',
                   path,
                   result_xfm='datasets',
                   return_type='item-or-list')
    eq_(ds.path, path)
    ok_clean_git(path, annex=False)
    ok_file_has_content(opj(path, 'test.txt'), 'some')
Beispiel #60
0
def test_clone_simple_local(src, path):
    origin = Dataset(path)

    # now install it somewhere else
    ds = clone(src,
               path,
               description='mydummy',
               result_xfm='datasets',
               return_type='item-or-list')
    eq_(ds.path, path)
    ok_(ds.is_installed())
    if not isinstance(origin.repo, AnnexRepo):
        # this means it is a GitRepo
        ok_(isinstance(origin.repo, GitRepo))
        # stays plain Git repo
        ok_(isinstance(ds.repo, GitRepo))
        ok_(not isinstance(ds.repo, AnnexRepo))
        ok_(GitRepo.is_valid_repo(ds.path))
        eq_(set(ds.repo.get_indexed_files()), {'test.dat', 'INFO.txt'})
        ok_clean_git(path, annex=False)
    else:
        # must be an annex
        ok_(isinstance(ds.repo, AnnexRepo))
        ok_(AnnexRepo.is_valid_repo(ds.path, allow_noninitialized=False))
        eq_(set(ds.repo.get_indexed_files()),
            {'test.dat', 'INFO.txt', 'test-annex.dat'})
        ok_clean_git(path, annex=True)
        # no content was installed:
        ok_(not ds.repo.file_has_content('test-annex.dat'))
        uuid_before = ds.repo.uuid
        eq_(ds.repo.get_description(), 'mydummy')

    # installing it again, shouldn't matter:
    res = clone(src, path)
    assert_result_values_equal(res, 'source_url', [src])
    assert_status('notneeded', res)
    assert_message("dataset %s was already cloned from '%s'", res)
    ok_(ds.is_installed())
    if isinstance(origin.repo, AnnexRepo):
        eq_(uuid_before, ds.repo.uuid)