Esempio n. 1
0
def test_runnin_on_empty(path):
    # empty repo
    repo = AnnexRepo(path, create=True)
    # just wrap with a dataset
    ds = Dataset(path)
    # and run status ... should be good and do nothing
    eq_([], ds.status(result_renderer=None))
Esempio n. 2
0
def check_compress_file(ext, annex, path=None, name=None):
    # we base the archive name on the filename, in order to also
    # be able to properly test compressors where the corresponding
    # archive format has no capability of storing a filename
    # (i.e. where the archive name itself determines the filename
    # of the decompressed file, like .xz)
    archive = op.join(name, _filename + ext)
    compress_files([_filename], archive, path=path)
    assert_true(op.exists(archive))
    if annex:
        # It should work even when file is annexed and is a symlink to the
        # key
        from datalad.support.annexrepo import AnnexRepo
        repo = AnnexRepo(path, init=True)
        repo.add(_filename)
        repo.commit(files=[_filename], msg="commit")

    dir_extracted = name + "_extracted"
    try:
        decompress_file(archive, dir_extracted)
    except MissingExternalDependency as exc:
        raise SkipTest() from exc
    _filepath = op.join(dir_extracted, _filename)

    ok_file_has_content(_filepath, 'content')
Esempio n. 3
0
def test_smoke_pipelines(d):
    # Just to verify that we can correctly establish the pipelines
    AnnexRepo(d, create=True)
    with chpwd(d):
        with swallow_logs():
            for p in [pipeline('bogus'), collection_pipeline()]:
                ok_(len(p) > 1)
Esempio n. 4
0
def test_submodule_deinit(path):
    from datalad.support.annexrepo import AnnexRepo

    top_repo = AnnexRepo(path, create=False)
    eq_({'subm 1', '2'}, {s.name for s in top_repo.get_submodules()})
    # note: here init=True is ok, since we are using it just for testing
    with swallow_logs(new_level=logging.WARN) as cml:
        top_repo.update_submodule('subm 1', init=True)
        assert_in('Do not use update_submodule with init=True', cml.out)
    top_repo.update_submodule('2', init=True)

    # ok_(all([s.module_exists() for s in top_repo.get_submodules()]))
    # TODO: old assertion above if non-bare? (can't use "direct mode" in test_gitrepo)
    # Alternatively: New testrepo (plain git submodules) and have a dedicated
    # test for annexes in addition
    ok_(
        all([
            GitRepo.is_valid_repo(op.join(top_repo.path, s.path))
            for s in top_repo.get_submodules()
        ]))

    # modify submodule:
    with open(op.join(top_repo.path, 'subm 1', 'file_ut.dat'), "w") as f:
        f.write("some content")

    assert_raises(CommandError, top_repo.deinit_submodule, 'sub1')

    # using force should work:
    top_repo.deinit_submodule('subm 1', force=True)

    ok_(not top_repo.repo.submodule('subm 1').module_exists())
Esempio n. 5
0
def test_unlock_raises(path, path2, path3):

    # make sure, we are not within a dataset:
    _cwd = getpwd()
    chpwd(path)

    # no dataset and no path:
    assert_raises(InsufficientArgumentsError, unlock, dataset=None, path=None)
    # no dataset and path not within a dataset:
    assert_raises(NoDatasetArgumentFound, unlock, dataset=None, path=path2)

    create(path=path, no_annex=True)
    ds = Dataset(path)
    # no complaints
    ds.unlock()

    # make it annex, but call unlock with invalid path:
    (ds.pathobj / ".noannex").unlink()
    AnnexRepo(path, create=True)

    # One that doesn't exist.
    res = ds.unlock(path="notexistent.txt",
                    result_xfm=None,
                    on_failure='ignore',
                    return_type='item-or-list')
    eq_(res['message'], "path does not exist")

    # And one that isn't associated with a dataset.
    assert_in_results(ds.unlock(path=path2, on_failure="ignore"),
                      status="error",
                      message="path not underneath this dataset")

    chpwd(_cwd)
Esempio n. 6
0
def test_add_archive_use_archive_dir(repo_path):
    repo = AnnexRepo(repo_path, create=True)
    with chpwd(repo_path):
        # Let's add first archive to the repo with default setting
        archive_path = opj('4u', '1.tar.gz')
        # check it gives informative error if archive is not already added
        with assert_raises(RuntimeError) as cmr:
            add_archive_content(archive_path)
        assert_re_in(
            "You should run ['\"]datalad add 4u\\\\1\\.tar\\.gz['\"] first"
            if on_windows else
            "You should run ['\"]datalad add 4u/1\\.tar\\.gz['\"] first",
            str(cmr.exception),
            match=False)
        with swallow_outputs():
            repo.add(archive_path)
        repo.commit("added 1.tar.gz")

        ok_archives_caches(repo.path, 0)
        add_archive_content(archive_path,
                            strip_leading_dirs=True,
                            use_current_dir=True)
        ok_(not exists(opj('4u', '1 f.txt')))
        ok_file_under_git(repo.path, '1 f.txt', annexed=True)
        ok_archives_caches(repo.path, 0)

        # and now let's extract under archive dir
        add_archive_content(archive_path, strip_leading_dirs=True)
        ok_file_under_git(repo.path, opj('4u', '1 f.txt'), annexed=True)
        ok_archives_caches(repo.path, 0)

        add_archive_content(opj('4u', 'sub.tar.gz'))
        ok_file_under_git(repo.path, opj('4u', 'sub', '2 f.txt'), annexed=True)
        ok_archives_caches(repo.path, 0)
Esempio n. 7
0
def mk_push_target(ds, name, path, annex=True, bare=True):
    # life could be simple, but nothing is simple on windows
    #src.create_sibling(dst_path, name='target')
    if annex:
        if bare:
            target = GitRepo(path=path, bare=True, create=True)
            # cannot use call_annex()
            target.call_git(['annex', 'init'])
        else:
            target = AnnexRepo(path, init=True, create=True)
            if not target.is_managed_branch():
                # for managed branches we need more fireworks->below
                target.config.set('receive.denyCurrentBranch',
                                  'updateInstead',
                                  where='local')
    else:
        target = GitRepo(path=path, bare=bare, create=True)
    ds.siblings('add', name=name, url=path, result_renderer=None)
    if annex and not bare and target.is_managed_branch():
        # maximum complication
        # the target repo already has a commit that is unrelated
        # to the source repo, because it has built a reference
        # commit for the managed branch.
        # the only sane approach is to let git-annex establish a shared
        # history
        ds.repo.call_annex(['sync'])
        ds.repo.call_annex(['sync', '--cleanup'])
    return target
Esempio n. 8
0
def test_unlock_raises(path, path2, path3):

    # make sure, we are not within a dataset:
    _cwd = getpwd()
    chpwd(path)

    # no dataset and no path:
    assert_raises(InsufficientArgumentsError,
                  unlock, dataset=None, path=None)
    # no dataset and path not within a dataset:
    res = unlock(dataset=None, path=path2, result_xfm=None,
                 on_failure='ignore', return_type='item-or-list')
    eq_(res['message'], "path not associated with any dataset")
    eq_(res['path'], path2)

    create(path=path, no_annex=True)
    ds = Dataset(path)
    # no complaints
    ds.unlock()

    # make it annex, but call unlock with invalid path:
    AnnexRepo(path, create=True)
    res = ds.unlock(path="notexistent.txt", result_xfm=None,
                    on_failure='ignore', return_type='item-or-list')
    eq_(res['message'], "path does not exist")

    chpwd(_cwd)
Esempio n. 9
0
def test_rotree(d):
    d2 = opj(d, 'd1', 'd2')  # deep nested directory
    f = opj(d2, 'f1')
    os.makedirs(d2)
    with open(f, 'w') as f_:
        f_.write("LOAD")
    with swallow_logs():
        ar = AnnexRepo(d2)
    rotree(d)
    # we shouldn't be able to delete anything UNLESS in "crippled" situation:
    # root, or filesystem is FAT etc
    # Theoretically annex should declare FS as crippled when ran as root, but
    # see http://git-annex.branchable.com/bugs/decides_that_FS_is_crippled_
    # under_cowbuilder___40__symlinks_supported_etc__41__/#comment-60c3cbe2710d6865fb9b7d6e247cd7aa
    # so explicit 'or'
    if not (ar.is_crippled_fs() or (os.getuid() == 0)):
        assert_raises(OSError, os.unlink, f)  # OK to use os.unlink
        assert_raises(OSError, unlink, f)  # and even with waiting and trying!
        assert_raises(OSError, shutil.rmtree, d)
        # but file should still be accessible
        with open(f) as f_:
            eq_(f_.read(), "LOAD")
    # make it RW
    rotree(d, False)
    unlink(f)
    shutil.rmtree(d)
Esempio n. 10
0
def test_ls_repos(toppath):
    # smoke test pretty much
    GitRepo(toppath + '1', create=True)
    AnnexRepo(toppath + '2', create=True)
    repos = glob(toppath + '*')
    # now make that sibling directory from which we will ls later
    mkdir(toppath)

    def _test(*args_):
        #print args_
        for args in args_:
            for recursive in [False, True]:
                # in both cases shouldn't fail
                with swallow_outputs() as cmo:
                    ls(args, recursive=recursive)
                    assert_equal(len(cmo.out.rstrip().split('\n')), len(args))
                    assert_in('[annex]', cmo.out)
                    assert_in('[git]', cmo.out)
                    assert_in(DEFAULT_BRANCH, cmo.out)
                    if "bogus" in args:
                        assert_in('unknown', cmo.out)

    _test(repos, repos + ["/some/bogus/file"])
    # check from within a sibling directory with relative paths
    with chpwd(toppath):
        _test([relpath(x, toppath) for x in repos])
Esempio n. 11
0
def test_symlinked_dataset_properties(repo1, repo2, repo3, non_repo, symlink):

    ds = Dataset(repo1).create()

    # now, let ds be a symlink and change that symlink to point to different
    # things:
    ar2 = AnnexRepo(repo2)
    ar3 = AnnexRepo(repo3)
    assert_true(os.path.isabs(non_repo))

    os.symlink(repo1, symlink)
    ds_link = Dataset(symlink)
    assert_is(ds_link.repo, ds.repo)  # same Repo instance
    assert_is_not(ds_link, ds)  # but not the same Dataset instance
    assert_is(ds_link.config, ds.repo.config)
    assert_true(ds_link._cfg_bound)
    assert_is_not_none(ds_link.id)
    # same id, although different Dataset instance:
    assert_equal(ds_link.id, ds.id)

    os.unlink(symlink)
    os.symlink(repo2, symlink)

    assert_is(ds_link.repo, ar2)  # same Repo instance
    assert_is(ds_link.config, ar2.config)
    assert_true(ds_link._cfg_bound)
    # id is None again, since this repository is an annex but there was no
    # Dataset.create() called yet.
    assert_is_none(ds_link.id)

    os.unlink(symlink)
    os.symlink(repo3, symlink)

    assert_is(ds_link.repo, ar3)  # same Repo instance
    assert_is(ds_link.config, ar3.config)
    assert_true(ds_link._cfg_bound)
    # id is None again, since this repository is an annex but there was no
    # Dataset.create() called yet.
    assert_is_none(ds_link.id)

    os.unlink(symlink)
    os.symlink(non_repo, symlink)

    assert_is_none(ds_link.repo)
    assert_is_not(ds_link.config, ar3.config)
    assert_false(ds_link._cfg_bound)
    assert_is_none(ds_link.id)
Esempio n. 12
0
def test_update_fetch_all(src, remote_1, remote_2):
    rmt1 = AnnexRepo(remote_1, src)
    rmt2 = AnnexRepo(remote_2, src)

    ds = Dataset(src)
    ds.add_sibling(name="sibling_1", url=remote_1)
    ds.add_sibling(name="sibling_2", url=remote_2)

    # modify the remotes:
    with open(opj(remote_1, "first.txt"), "w") as f:
        f.write("some file load")
    rmt1.add_to_annex("first.txt")
    # TODO: Modify an already present file!

    with open(opj(remote_2, "second.txt"), "w") as f:
        f.write("different file load")
    rmt2.git_add("second.txt")
    rmt2.git_commit("Add file to git.")

    # fetch all remotes
    ds.update(fetch_all=True)

    # no merge, so changes are not in active branch:
    assert_not_in("first.txt",
                  ds.repo.git_get_files(ds.repo.git_get_active_branch()))
    assert_not_in("second.txt",
                  ds.repo.git_get_files(ds.repo.git_get_active_branch()))
    # but we know the changes in remote branches:
    assert_in("first.txt", ds.repo.git_get_files("sibling_1/master"))
    assert_in("second.txt", ds.repo.git_get_files("sibling_2/master"))

    # no merge strategy for multiple remotes yet:
    assert_raises(NotImplementedError, ds.update, merge=True, fetch_all=True)

    # merge a certain remote:
    ds.update(name="sibling_1", merge=True)

    # changes from sibling_2 still not present:
    assert_not_in("second.txt",
                  ds.repo.git_get_files(ds.repo.git_get_active_branch()))
    # changes from sibling_1 merged:
    assert_in("first.txt",
              ds.repo.git_get_files(ds.repo.git_get_active_branch()))
    # it's known to annex, but has no content yet:
    ds.repo.get_file_key("first.txt")  # raises if unknown
    eq_([False], ds.repo.file_has_content(["first.txt"]))
Esempio n. 13
0
def test_smoke_pipelines(d):
    # Just to verify that we can correctly establish the pipelines
    AnnexRepo(d, create=True)
    with chpwd(d):
        with swallow_logs():
            for p in [superdataset_pipeline(NITRC_IR)]:
                print(p)
                ok_(len(p) > 1)
Esempio n. 14
0
def test_newthings_coming_down(originpath, destpath):
    origin = GitRepo(originpath, create=True)
    create_tree(originpath, {'load.dat': 'heavy'})
    Dataset(originpath).save('load.dat')
    ds = install(source=originpath,
                 path=destpath,
                 result_xfm='datasets',
                 return_type='item-or-list')
    assert_is_instance(ds.repo, GitRepo)
    assert_in('origin', ds.repo.get_remotes())
    # turn origin into an annex
    origin = AnnexRepo(originpath, create=True)
    # clone doesn't know yet
    assert_false(knows_annex(ds.path))
    # but after an update it should
    # no merge, only one sibling, no parameters should be specific enough
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    assert (knows_annex(ds.path))
    # no branches appeared
    eq_(ds.repo.get_branches(), [DEFAULT_BRANCH])
    # now merge, and get an annex
    assert_result_count(ds.update(merge=True),
                        1,
                        action='update',
                        status='ok',
                        type='dataset')
    assert_in('git-annex', ds.repo.get_branches())
    assert_is_instance(ds.repo, AnnexRepo)
    # should be fully functional
    testfname = opj(ds.path, 'load.dat')
    assert_false(ds.repo.file_has_content(testfname))
    ds.get('.')
    ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy')
    # check that a new tag comes down
    origin.tag('first!')
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    eq_(ds.repo.get_tags(output='name')[0], 'first!')

    # and now we destroy the remote annex
    origin.call_git(['config', '--remove-section', 'annex'])
    rmtree(opj(origin.path, '.git', 'annex'), chmod_files=True)
    origin.call_git(['branch', '-D', 'git-annex'])
    origin = GitRepo(originpath)
    assert_false(knows_annex(originpath))

    # and update the local clone
    # for now this should simply not fail (see gh-793), later might be enhanced to a
    # graceful downgrade
    before_branches = ds.repo.get_branches()
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    eq_(before_branches, ds.repo.get_branches())
    # annex branch got pruned
    eq_(['origin/HEAD', 'origin/' + DEFAULT_BRANCH],
        ds.repo.get_remote_branches())
    # check that a new tag comes down even if repo types mismatch
    origin.tag('second!')
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    eq_(ds.repo.get_tags(output='name')[-1], 'second!')
Esempio n. 15
0
def test_proxying_open_testrepobased(repo):
    TEST_CONTENT = "content to be annex-addurl'd"
    fname = 'test-annex.dat'
    fpath = opj(repo, fname)
    assert_raises(IOError, open, fpath)

    aio = AutomagicIO(activate=True)
    try:
        with swallow_outputs():
            # now we should be able just to request to open this file
            with open(fpath) as f:
                content = f.read()
                eq_(content, TEST_CONTENT)
    finally:
        aio.deactivate()

    # and now that we have fetched it, nothing should forbid us to open it again
    with open(fpath) as f:
        eq_(f.read(), TEST_CONTENT)

    annex = AnnexRepo(repo, create=False)
    # Let's create another file deeper under the directory with the same content
    # so it would point to the same key, which we would drop and repeat the drill
    fpath2 = opj(repo, 'd1', 'd2', 'test2.dat')
    os.makedirs(dirname(fpath2))
    with open(fpath2, 'w') as f:
        f.write(content)
    annex.add(fpath2)
    annex.drop(fpath2)
    annex.commit("added and dropped")
    assert_raises(IOError, open, fpath2)

    # Let's use context manager form
    with AutomagicIO() as aio:
        ok_(isinstance(aio, AutomagicIO))
        ok_(aio.active)
        # swallowing output would cause trouble while testing with
        # DATALAD_ASSERT_NO_OPEN_FILES mode on.  Reason is not 100% clear
        # on why underlying git-annex process would be dumping to stdout or err
        #with swallow_outputs():

        # now we should be able just to request to open this file
        with open(fpath2) as f:
            content = f.read()
            eq_(content, TEST_CONTENT)

    annex.drop(fpath2)
    assert_raises(IOError, open, fpath2)

    # Let's use relative path
    with chpwd(opj(repo, 'd1')):
        # Let's use context manager form
        with AutomagicIO() as aio, \
                swallow_outputs(), \
                open(opj('d2', 'test2.dat')) as f:
            content = f.read()
            eq_(content, TEST_CONTENT)
Esempio n. 16
0
def test_AnnexRepo_file_has_content(src, annex_path):

    ar = AnnexRepo(annex_path, src)
    testfiles = ["test-annex.dat", "test.dat"]
    assert_equal(ar.file_has_content(testfiles), [("test-annex.dat", False),
                                                  ("test.dat", False)])
    ar.annex_get("test-annex.dat")
    assert_equal(ar.file_has_content(testfiles), [("test-annex.dat", True),
                                                  ("test.dat", False)])
Esempio n. 17
0
def test_knows_annex(here, there):
    from datalad.support.gitrepo import GitRepo
    from datalad.support.annexrepo import AnnexRepo
    GitRepo(path=here, create=True)
    assert_false(knows_annex(here))
    AnnexRepo(path=here, create=True)
    assert_true(knows_annex(here))
    GitRepo.clone(path=there, url=here, create=True)
    assert_true(knows_annex(there))
Esempio n. 18
0
def _test_crawl_init_error(args, template, template_func, target_value,
                           tmpdir):
    ar = AnnexRepo(tmpdir)
    with chpwd(tmpdir):
        assert_raises(target_value,
                      crawl_init,
                      args=args,
                      template=template,
                      template_func=template_func)
Esempio n. 19
0
def test_direct_cfg(path1=None, path2=None):
    # and if repo already exists and we have env var - we fail too
    # Adding backend so we get some commit into the repo
    ar = AnnexRepo(path1, create=True, backend='MD5E')
    del ar;  AnnexRepo._unique_instances.clear()  # fight flyweight
    for path in (path1, path2):
        with patch.dict('os.environ', {'DATALAD_REPO_DIRECT': 'True'}):
            # try to create annex repo in direct mode as see how it fails
            with assert_raises(DirectModeNoLongerSupportedError) as cme:
                AnnexRepo(path, create=True)
            assert_in("no longer supported by DataLad", str(cme.value)) # we have generic part
            assert_in("datalad.repo.direct configuration", str(cme.value)) # situation specific part
    # assert not op.exists(path2)   # that we didn't create it - we do!
    #   fixing for that would be too cumbersome since we first call GitRepo.__init__
    #   with create
    ar = AnnexRepo(path1)
    # check if we somehow didn't reset the flag
    assert not ar.is_direct_mode()
Esempio n. 20
0
def test_AnnexRepo_is_direct_mode(path):

    ar = AnnexRepo(path)
    dm = ar.is_direct_mode()
    if on_windows:
        assert_true(dm,
                    "AnnexRepo.is_direct_mode() returned false on windows.")
    else:
        assert_false(
            dm, "AnnexRepo.is_direct_mode() returned true on non-windows")
Esempio n. 21
0
def clone_url(url):
    # delay import of our code until needed for certain
    from ..cmd import Runner
    runner = Runner()
    tdir = tempfile.mkdtemp(**get_tempfile_kwargs({}, prefix='clone_url'))
    _ = runner(["git", "clone", url, tdir], expect_stderr=True)
    if GitRepo(tdir).is_with_annex():
        AnnexRepo(tdir, init=True)
    _TEMP_PATHS_CLONES.add(tdir)
    return tdir
Esempio n. 22
0
def test_add_archive_content_zip(repo_path):
    repo = AnnexRepo(repo_path, create=True)
    with chpwd(repo_path):
        with swallow_outputs():
            repo.add(["1.zip"])
        repo.commit("add 1.zip")
        add_archive_content("1.zip")
        ok_file_under_git(opj(repo.path, "1", "foo"), annexed=True)
        ok_file_under_git(opj("1", "dir", "bar"), annexed=True)
        ok_archives_caches(repo.path, 0)
Esempio n. 23
0
def test_publish_simple(origin, src_path, dst_path):

    # prepare src
    source = install(path=src_path, source=origin, recursive=True)
    # TODO: For now, circumnavigate the detached head issue.
    # Figure out, what to do.
    for subds in source.get_dataset_handles(recursive=True):
        AnnexRepo(opj(src_path, subds), init=True,
                  create=True).git_checkout("master")
    # forget we cloned it (provide no 'origin' anymore), which should lead to
    # setting tracking branch to target:
    source.repo.git_remote_remove("origin")

    # create plain git at target:
    target = GitRepo(dst_path, create=True)
    target.git_checkout("TMP", "-b")
    source.repo.git_remote_add("target", dst_path)

    res = publish(dataset=source, dest="target")
    eq_(res, source)

    ok_clean_git(src_path, annex=False)
    ok_clean_git(dst_path, annex=False)
    eq_(list(target.git_get_branch_commits("master")),
        list(source.repo.git_get_branch_commits("master")))

    # don't fail when doing it again
    res = publish(dataset=source, dest="target")
    eq_(res, source)

    ok_clean_git(src_path, annex=False)
    ok_clean_git(dst_path, annex=False)
    eq_(list(target.git_get_branch_commits("master")),
        list(source.repo.git_get_branch_commits("master")))
    eq_(list(target.git_get_branch_commits("git-annex")),
        list(source.repo.git_get_branch_commits("git-annex")))

    # 'target/master' should be tracking branch at this point, so
    # try publishing without `dest`:

    # some modification:
    with open(opj(src_path, 'test_mod_file'), "w") as f:
        f.write("Some additional stuff.")
    source.repo.git_add(opj(src_path, 'test_mod_file'))
    source.repo.git_commit("Modified.")
    ok_clean_git(src_path, annex=False)

    res = publish(dataset=source)
    eq_(res, source)

    ok_clean_git(dst_path, annex=False)
    eq_(list(target.git_get_branch_commits("master")),
        list(source.repo.git_get_branch_commits("master")))
    eq_(list(target.git_get_branch_commits("git-annex")),
        list(source.repo.git_get_branch_commits("git-annex")))
Esempio n. 24
0
def test_balsa_pipeline1(ind, topurl, outd, clonedir):
    list(initiate_dataset(
        template="balsa",
        dataset_name='dataladtest-WG33',
        path=outd,
        data_fields=['dataset_id'])({'dataset_id': 'WG33'}))

    with chpwd(outd):
        pipeline = ofpipeline('WG33', url=topurl)
        out = run_pipeline(pipeline)
    eq_(len(out), 1)

    repo = AnnexRepo(outd, create=False)  # to be used in the checks
    # Inspect the tree -- that we have all the branches
    branches = {'master', 'incoming', 'incoming-processed', 'git-annex'}
    eq_(set(repo.get_branches()), branches)
    # since now we base incoming on master -- and there were nothing custom
    # in master after incoming-processed, both branches should be the same
    eq_(repo.get_hexsha('master'), repo.get_hexsha('incoming-processed'))
    # but that one is different from incoming
    assert_not_equal(repo.get_hexsha('incoming'), repo.get_hexsha('incoming-processed'))

    get_branch_commits = repo.get_branch_commits_ \
        if hasattr(repo, 'get_branch_commits_') else repo.get_branch_commits
    commits = {b: list(get_branch_commits(b)) for b in branches}
    # all commits out there -- init ds + init crawler + 1*(incoming, processed)
    # The number of commits in master differs based on the create variant used
    # (the one DataLad's master makes only one commit).
    ncommits_master = len(commits["master"])
    assert_in(ncommits_master, [4, 5])
    # incoming branches from master but lacks one merge commit.
    eq_(len(commits['incoming']), ncommits_master - 1)
    # incoming-processed is on master.
    eq_(len(commits['incoming-processed']), ncommits_master)

    with chpwd(outd):
        eq_(set(glob('*')), {'dir1', 'file1.nii'})
        all_files = sorted(find_files('.'))

    fpath = opj(outd, 'file1.nii')
    ok_file_has_content(fpath, "content of file1.nii")
    ok_file_under_git(fpath, annexed=True)
    fpath2 = opj(outd, 'dir1', 'file2.nii')
    ok_file_has_content(fpath2, "content of file2.nii")
    ok_file_under_git(fpath2, annexed=True)

    target_files = {
        './.datalad/crawl/crawl.cfg',
        './.datalad/crawl/statuses/incoming.json',
        './.datalad/meta/balsa.json',
        './.datalad/config',
        './file1.nii', './dir1/file2.nii',
    }

    eq_(set(all_files), target_files)
Esempio n. 25
0
def test_direct_cfg(path1, path2):
    # and if repo already exists and we have env var - we fail too
    # Adding backend so we get some commit into the repo
    ar = AnnexRepo(path1, create=True, backend='MD5E')
    del ar
    AnnexRepo._unique_instances.clear()  # fight flyweight
    for path in (path1, path2):
        with patch.dict('os.environ', {'DATALAD_REPO_DIRECT': 'True'}):
            # try to create annex repo in direct mode as see how it fails
            with assert_raises(DirectModeNoLongerSupportedError) as cme:
                AnnexRepo(path, create=True)
            assert_in("no longer supported by DataLad",
                      str(cme.exception))  # we have generic part
            assert_in("datalad.repo.direct configuration",
                      str(cme.exception))  # situation specific part
    # assert not op.exists(path2)   # that we didn't create it - we do!
    #   fixing for that would be too cumbersome since we first call GitRepo.__init__
    #   with create
    ar = AnnexRepo(path1)
    # check if we somehow didn't reset the flag
    assert not ar.is_direct_mode()

    if ar.config.obtain("datalad.repo.version") >= 6:
        raise SkipTest(
            "Created repo not v5, cannot test detection of direct mode repos")
    # and if repo existed before and was in direct mode, we fail too
    # Since direct= option was deprecated entirely, we use protected method now
    ar._set_direct_mode(True)
    assert ar.is_direct_mode()
    del ar  # but we would need to disable somehow the flywheel
    with patch.dict('os.environ', {'DATALAD_REPO_DIRECT': 'True'}):
        with assert_raises(DirectModeNoLongerSupportedError) as cme:
            AnnexRepo(path1, create=False)

    # TODO: RM DIRECT decide what should we here -- should we test/blow?
    #   ATM both tests below just pass
    ar2 = AnnexRepo(path2, create=True)
    # happily can do it since it doesn't need a worktree to do the clone
    ar2.add_submodule('sub1', url=path1)
    ar2sub1 = AnnexRepo(op.join(path2, 'sub1'))
    # but now let's convert that sub1 to direct mode
    assert not ar2sub1.is_direct_mode()
    ar2sub1._set_direct_mode(True)
    assert ar2sub1.is_direct_mode()
    del ar2
    del ar2sub1
    AnnexRepo._unique_instances.clear()  # fight flyweight

    ar2 = AnnexRepo(path2)
    ar2.get_submodules()

    # And what if we are trying to add pre-cloned repo in direct mode?
    ar2sub2 = AnnexRepo.clone(path1, op.join(path2, 'sub2'))
    ar2sub2._set_direct_mode(True)
    del ar2sub2
    AnnexRepo._unique_instances.clear()  # fight flyweight
    ar2.add('sub2')
Esempio n. 26
0
def test_ls_noarg(toppath):
    # smoke test pretty much
    AnnexRepo(toppath, create=True)

    # this test is pointless for now and until ls() actually returns
    # something
    with swallow_outputs():
        ls_out = ls(toppath)
        with chpwd(toppath):
            assert_equal(ls_out, ls([]))
            assert_equal(ls_out, ls('.'))
Esempio n. 27
0
def test_AnnexRepo_instance_from_clone(src, dst):

    ar = AnnexRepo(dst, src)
    assert_is_instance(ar, AnnexRepo, "AnnexRepo was not created.")
    assert_true(os.path.exists(os.path.join(dst, '.git', 'annex')))

    # do it again should raise GitCommandError since git will notice there's already a git-repo at that path
    # and therefore can't clone to `dst`
    with swallow_logs() as cm:
        assert_raises(GitCommandError, AnnexRepo, dst, src)
        assert ("already exists" in cm.out)
Esempio n. 28
0
def test_publish_recursive(origin, src_path, dst_path, sub1_pub, sub2_pub):

    # prepare src
    source = install(path=src_path, source=origin, recursive=True)
    # TODO: For now, circumnavigate the detached head issue.
    # Figure out, what to do.
    for subds in source.get_dataset_handles(recursive=True):
        AnnexRepo(opj(src_path, subds), init=True,
                  create=True).git_checkout("master")

    # create plain git at target:
    target = GitRepo(dst_path, create=True)
    target.git_checkout("TMP", "-b")
    source.repo.git_remote_add("target", dst_path)

    # subdatasets have no remote yet, so recursive publishing should fail:
    with assert_raises(ValueError) as cm:
        publish(dataset=source, dest="target", recursive=True)
    assert_in("No sibling 'target' found.", str(cm.exception))

    # now, set up targets for the submodules:
    sub1_target = GitRepo(sub1_pub, create=True)
    sub1_target.git_checkout("TMP", "-b")
    sub2_target = GitRepo(sub2_pub, create=True)
    sub2_target.git_checkout("TMP", "-b")
    sub1 = GitRepo(opj(src_path, 'sub1'), create=False)
    sub2 = GitRepo(opj(src_path, 'sub2'), create=False)
    sub1.git_remote_add("target", sub1_pub)
    sub2.git_remote_add("target", sub2_pub)

    # publish recursively
    res = publish(dataset=source, dest="target", recursive=True)

    # testing result list
    # (Note: Dataset lacks __eq__ for now. Should this be based on path only?)
    assert_is_instance(res, list)
    for item in res:
        assert_is_instance(item, Dataset)
    eq_(res[0].path, src_path)
    eq_(res[1].path, sub1.path)
    eq_(res[2].path, sub2.path)

    eq_(list(target.git_get_branch_commits("master")),
        list(source.repo.git_get_branch_commits("master")))
    eq_(list(target.git_get_branch_commits("git-annex")),
        list(source.repo.git_get_branch_commits("git-annex")))
    eq_(list(sub1_target.git_get_branch_commits("master")),
        list(sub1.git_get_branch_commits("master")))
    eq_(list(sub1_target.git_get_branch_commits("git-annex")),
        list(sub1.git_get_branch_commits("git-annex")))
    eq_(list(sub2_target.git_get_branch_commits("master")),
        list(sub2.git_get_branch_commits("master")))
    eq_(list(sub2_target.git_get_branch_commits("git-annex")),
        list(sub2.git_get_branch_commits("git-annex")))
Esempio n. 29
0
def test_AnnexRepo_crippled_filesystem(src, dst):
    # TODO: This test is rudimentary, since platform not really determines filesystem.
    # For now this should work for the buildbots. Nevertheless: Find a better way to test it.

    ar = AnnexRepo(dst, src)
    if on_windows:
        assert_true(ar.is_crippled_fs(),
                    "Detected non-crippled filesystem on windows.")
    else:
        assert_false(ar.is_crippled_fs(),
                     "Detected crippled filesystem on non-windows.")
Esempio n. 30
0
def test_ignored(topdir):
    # create annex, git repos
    AnnexRepo(opj(topdir, 'annexdir'), create=True)
    GitRepo(opj(topdir, 'gitdir'), create=True)

    # non-git or annex should not be ignored
    assert_equal(ignored(topdir), False)
    # git, annex and hidden nodes should be ignored
    for subdir in ["annexdir", "gitdir", ".hidden"]:
        assert_equal(ignored(opj(topdir, subdir)), True)
    # ignore only hidden nodes(not git or annex repos) flag should work
    assert_equal(ignored(opj(topdir, "annexdir"), only_hidden=True), False)