Beispiel #1
0
def test_uninstall_git_file(path):
    ds = Dataset(path)
    ok_(ds.is_installed())
    ok_(exists(opj(path, 'INFO.txt')))
    ok_file_under_git(ds.repo.path, 'INFO.txt')

    # drop file in Git in an annex repo
    # regardless of the type of repo this is 'notneeded'...
    # it is less about education that about "can we
    # we get the content back?", and for a file in Git we can
    assert_result_count(
        ds.drop(path='INFO.txt'),
        1,
        status='notneeded',
        message="no annex'ed content")

    res = ds.uninstall(path="INFO.txt", on_failure='ignore')
    assert_result_count(
        res, 1,
        status='impossible',
        message='can only uninstall datasets (consider the `drop` command)')

    # remove the file:
    res = ds.remove(path='INFO.txt', result_xfm='paths',
                    result_filter=lambda x: x['action'] == 'remove')
    assert_raises(AssertionError, ok_file_under_git, ds.repo.path, 'INFO.txt')
    ok_(not exists(opj(path, 'INFO.txt')))
    eq_(res, ['INFO.txt'])
Beispiel #2
0
def test_url_base():
    # Basic checks
    assert_raises(ValueError, URL, "http://example.com", hostname='example.com')
    url = URL("http://example.com")
    eq_(url.hostname, 'example.com')
    eq_(url.scheme, 'http')
    eq_(url.port, '')  # not specified -- empty strings
    eq_(url.username, '')  # not specified -- empty strings
    eq_(repr(url), "URL(hostname='example.com', scheme='http')")
    eq_(url, "http://example.com")  # automagic coercion in __eq__

    neq_(URL(), URL(hostname='x'))

    smth = URL('smth')
    eq_(smth.hostname, '')
    ok_(bool(smth))
    nok_(bool(URL()))

    assert_raises(ValueError, url._set_from_fields, unknown='1')

    with swallow_logs(new_level=logging.WARNING) as cml:
        # we don't "care" about params ATM so there is a warning if there are any
        purl = URL("http://example.com/;param")
        eq_(str(purl), 'http://example.com/;param')  # but we do maintain original string
        assert_in('ParseResults contains params', cml.out)
        eq_(purl.as_str(), 'http://example.com/')
Beispiel #3
0
def test_install_subdataset(src, path):
    # get the superdataset:
    ds = install(path=path, source=src)

    # subdataset not installed:
    subds = Dataset(opj(path, 'sub1'))
    assert_false(subds.is_installed())

    # install it:
    ds.install('sub1')

    ok_(subds.is_installed())
    # Verify that it is the correct submodule installed and not
    # new repository initiated
    assert_equal(set(subds.repo.get_indexed_files()),
                 {'test.dat', 'INFO.txt', 'test-annex.dat'})

    # Now the obnoxious install an annex file within not yet
    # initialized repository!
    with swallow_outputs():  # progress bar
        ds.install(opj('sub2', 'test-annex.dat'))
    subds2 = Dataset(opj(path, 'sub2'))
    assert(subds2.is_installed())
    assert(subds2.repo.file_has_content('test-annex.dat'))
    # we shouldn't be able silently ignore attempt to provide source while
    # "installing" file under git
    assert_raises(FileInGitError, ds.install, opj('sub2', 'INFO.txt'), source="http://bogusbogus")
Beispiel #4
0
def test_unlock_raises(path, path2, path3):

    # make sure, we are not within a dataset:
    _cwd = getpwd()
    chpwd(path)

    # no dataset and no path:
    assert_raises(InsufficientArgumentsError,
                  unlock, dataset=None, path=None)
    # no dataset and path not within a dataset:
    with swallow_logs(new_level=logging.WARNING) as cml:
        unlock(dataset=None, path=path2)
        assert_in("ignored paths that do not belong to any dataset: ['{0}'".format(path2),
                  cml.out)

    create(path=path, no_annex=True)
    ds = Dataset(path)
    # no complaints
    ds.unlock()

    # make it annex, but call unlock with invalid path:
    AnnexRepo(path, create=True)
    with swallow_logs(new_level=logging.WARNING) as cml:
        ds.unlock(path="notexistent.txt")
        assert_in("ignored non-existing paths", cml.out)

    chpwd(_cwd)
Beispiel #5
0
def test_submodule_deinit(path):
    from datalad.support.annexrepo import AnnexRepo

    top_repo = AnnexRepo(path, create=False)
    eq_({'subm 1', '2'}, {s.name for s in top_repo.get_submodules()})
    # note: here init=True is ok, since we are using it just for testing
    with swallow_logs(new_level=logging.WARN) as cml:
        top_repo.update_submodule('subm 1', init=True)
        assert_in('Do not use update_submodule with init=True', cml.out)
    top_repo.update_submodule('2', init=True)

    # ok_(all([s.module_exists() for s in top_repo.get_submodules()]))
    # TODO: old assertion above if non-bare? (can't use "direct mode" in test_gitrepo)
    # Alternatively: New testrepo (plain git submodules) and have a dedicated
    # test for annexes in addition
    ok_(all([GitRepo.is_valid_repo(op.join(top_repo.path, s.path))
             for s in top_repo.get_submodules()]))

    # modify submodule:
    with open(op.join(top_repo.path, 'subm 1', 'file_ut.dat'), "w") as f:
        f.write("some content")

    assert_raises(CommandError, top_repo.deinit_submodule, 'sub1')

    # using force should work:
    top_repo.deinit_submodule('subm 1', force=True)

    ok_(not top_repo.repo.submodule('subm 1').module_exists())
Beispiel #6
0
def test_invalid_call(origin, tdir):
    ds = Dataset(origin)
    ds.uninstall('subm 1', check=False)
    # nothing
    assert_status('error', publish('/notthere', on_failure='ignore'))
    # known, but not present
    assert_status('impossible', publish(opj(ds.path, 'subm 1'), on_failure='ignore'))
    # --since without dataset is now supported as long as it
    # could be identified
    # assert_raises(InsufficientArgumentsError, publish, since='HEAD')
    # but if it couldn't be, then should indeed crash
    with chpwd(tdir):
        assert_raises(InsufficientArgumentsError, publish, since='HEAD')
    # new dataset, with unavailable subdataset
    dummy = Dataset(tdir).create()
    dummy_sub = dummy.create('sub')
    dummy_sub.uninstall()
    assert_in('sub', dummy.subdatasets(fulfilled=False, result_xfm='relpaths'))
    # now an explicit call to publish the unavailable subdataset
    assert_result_count(
        dummy.publish('sub', on_failure='ignore'),
        1,
        path=dummy_sub.path,
        status='impossible',
        type='dataset')
Beispiel #7
0
def test_unlock_raises(path, path2, path3):

    # make sure, we are not within a dataset:
    _cwd = getpwd()
    chpwd(path)

    # no dataset and no path:
    assert_raises(InsufficientArgumentsError,
                  unlock, dataset=None, path=None)
    # no dataset and path not within a dataset:
    res = unlock(dataset=None, path=path2, result_xfm=None,
                 on_failure='ignore', return_type='item-or-list')
    eq_(res['message'], "path not associated with any dataset")
    eq_(res['path'], path2)

    create(path=path, no_annex=True)
    ds = Dataset(path)
    # no complaints
    ds.unlock()

    # make it annex, but call unlock with invalid path:
    AnnexRepo(path, create=True)
    res = ds.unlock(path="notexistent.txt", result_xfm=None,
                    on_failure='ignore', return_type='item-or-list')
    eq_(res['message'], "path does not exist")

    chpwd(_cwd)
Beispiel #8
0
def test_fail_with_short_help():
    out = StringIO()
    with assert_raises(SystemExit) as cme:
        fail_with_short_help(exit_code=3, out=out)
    assert_equal(cme.exception.code, 3)
    assert_equal(out.getvalue(), "")

    out = StringIO()
    with assert_raises(SystemExit) as cme:
        fail_with_short_help(msg="Failed badly", out=out)
    assert_equal(cme.exception.code, 1)
    assert_equal(out.getvalue(), "error: Failed badly\n")

    # Suggestions, hint, etc
    out = StringIO()
    with assert_raises(SystemExit) as cme:
        fail_with_short_help(
            msg="Failed badly",
            known=["mother", "mutter", "father", "son"],
            provided="muther",
            hint="You can become one",
            exit_code=0,  # noone forbids
            what="parent",
            out=out)
    assert_equal(cme.exception.code, 0)
    assert_equal(out.getvalue(),
                 "error: Failed badly\n"
                 "datalad: Unknown parent 'muther'.  See 'datalad --help'.\n\n"
                 "Did you mean any of these?\n"
                 "        mutter\n"
                 "        mother\n"
                 "        father\n"
                 "Hint: You can become one\n")
Beispiel #9
0
def test_failed_install_multiple(top_path):
    ds = create(top_path)

    create(_path_(top_path, 'ds1'))
    create(_path_(top_path, 'ds3'))
    ok_clean_git(ds.path, annex=False, untracked=['ds1/', 'ds3/'])

    # specify install with multiple paths and one non-existing
    with assert_raises(IncompleteResultsError) as cme:
        ds.install(['ds1', 'ds2', '///crcns', '///nonexisting', 'ds3'])

    # install doesn't add existing submodules -- add does that
    ok_clean_git(ds.path, annex=False, untracked=['ds1/', 'ds3/'])
    ds.add(['ds1', 'ds3'])
    ok_clean_git(ds.path, annex=False)
    # those which succeeded should be saved now
    eq_(ds.get_subdatasets(), ['crcns', 'ds1', 'ds3'])
    # and those which didn't -- listed
    eq_(set(cme.exception.failed), {'///nonexisting', _path_(top_path, 'ds2')})

    # but if there was only a single installation requested -- it will be
    # InstallFailedError to stay consistent with single install behavior
    # TODO: unify at some point
    with assert_raises(InstallFailedError) as cme:
        ds.install('ds2')
    with assert_raises(InstallFailedError) as cme:
        ds.install('///nonexisting')
Beispiel #10
0
def test_install_into_dataset(source, top_path):

    ds = create(top_path)
    ok_clean_git(ds.path)

    subds = ds.install("sub", source=source, save=False)
    if isinstance(subds.repo, AnnexRepo) and subds.repo.is_direct_mode():
        ok_(exists(opj(subds.path, '.git')))
    else:
        ok_(isdir(opj(subds.path, '.git')))
    ok_(subds.is_installed())
    assert_in('sub', ds.get_subdatasets())
    # sub is clean:
    ok_clean_git(subds.path, annex=False)
    # top is not:
    assert_raises(AssertionError, ok_clean_git, ds.path, annex=False)
    ds.save('addsub')
    # now it is:
    ok_clean_git(ds.path, annex=False)

    # but we could also save while installing and there should be no side-effect
    # of saving any other changes if we state to not auto-save changes
    # Create a dummy change
    create_tree(ds.path, {'dummy.txt': 'buga'})
    ok_clean_git(ds.path, untracked=['dummy.txt'])
    subds_ = ds.install("sub2", source=source, if_dirty='ignore')
    eq_(subds_.path, opj(ds.path, "sub2"))  # for paranoid yoh ;)
    ok_clean_git(ds.path, untracked=['dummy.txt'])

    # and we should achieve the same behavior if we create a dataset
    # and then decide to add it
    create(_path_(top_path, 'sub3'))
    ok_clean_git(ds.path, untracked=['dummy.txt', 'sub3/'])
    ds.add('sub3')
    ok_clean_git(ds.path, untracked=['dummy.txt'])
Beispiel #11
0
def test_GitRepo_get_merge_base(src):
    repo = GitRepo(src, create=True)
    with open(op.join(src, 'file.txt'), 'w') as f:
        f.write('load')
    repo.add('*')
    repo.commit('committing')

    assert_raises(ValueError, repo.get_merge_base, [])
    branch1 = repo.get_active_branch()
    branch1_hexsha = repo.get_hexsha()
    eq_(len(branch1_hexsha), 40)
    eq_(repo.get_merge_base(branch1), branch1_hexsha)

    # Let's create a detached branch
    branch2 = "_detach_"
    repo.checkout(branch2, options=["--orphan"])
    # it will have all the files
    # Must not do:  https://github.com/gitpython-developers/GitPython/issues/375
    # repo.git_add('.')
    repo.add('*')
    # NOTE: fun part is that we should have at least a different commit message
    # so it results in a different checksum ;)
    repo.commit("committing again")
    assert(repo.get_indexed_files())  # we did commit
    assert(repo.get_merge_base(branch1) is None)
    assert(repo.get_merge_base([branch2, branch1]) is None)

    # Let's merge them up -- then merge base should match the master
    repo.merge(branch1, allow_unrelated=True)
    eq_(repo.get_merge_base(branch1), branch1_hexsha)

    # if points to some empty/non-existing branch - should also be None
    assert(repo.get_merge_base(['nonexistent', branch2]) is None)
Beispiel #12
0
def test_install_skip_list_arguments(src, path, path_outside):
    ds = install(path, source=src)
    ok_(ds.is_installed())

    # install a list with valid and invalid items:
    with swallow_logs(new_level=logging.WARNING) as cml:
        with assert_raises(IncompleteResultsError) as cme:
            ds.install(
                path=['subm 1', 'not_existing', path_outside, 'subm 2'],
                get_data=False)
        result = cme.exception.results
        for skipped in [opj(ds.path, 'not_existing'), path_outside]:
            cml.assert_logged(msg="ignored non-existing paths: {}\n".format(
                              [opj(ds.path, 'not_existing'), path_outside]),
                              regex=False, level='WARNING')
            pass
        ok_(isinstance(result, list))
        eq_(len(result), 2)
        for sub in [Dataset(opj(path, 'subm 1')), Dataset(opj(path, 'subm 2'))]:
            assert_in(sub, result)
            ok_(sub.is_installed())

    # return of get is always a list, even if just one thing was gotten
    # in this case 'subm1' was already obtained above, so this will get this
    # content of the subdataset
    with assert_raises(IncompleteResultsError) as cme:
        ds.install(path=['subm 1', 'not_existing'])
    with assert_raises(IncompleteResultsError) as cme:
        ds.get(path=['subm 1', 'not_existing'])
    result = cme.exception.results
    eq_(len(result), 1)
    eq_(result[0]['file'], 'subm 1/test-annex.dat')
Beispiel #13
0
def test_install_list(path, top_path):

    # we want to be able to install several things, if these are known
    # (no 'source' allowed). Therefore first toplevel:
    ds = install(top_path, source=path, recursive=False)
    assert_not_in('annex.hardlink', ds.config)
    ok_(ds.is_installed())
    sub1 = Dataset(opj(top_path, 'subm 1'))
    sub2 = Dataset(opj(top_path, '2'))
    ok_(not sub1.is_installed())
    ok_(not sub2.is_installed())

    # fails, when `source` is passed:
    assert_raises(ValueError, ds.install,
                  path=['subm 1', '2'],
                  source='something')

    # now should work:
    result = ds.install(path=['subm 1', '2'], result_xfm='paths')
    ok_(sub1.is_installed())
    ok_(sub2.is_installed())
    eq_(set(result), {sub1.path, sub2.path})
    # and if we request it again via get, result should be empty
    get_result = ds.get(path=['subm 1', '2'], get_data=False)
    assert_status('notneeded', get_result)
Beispiel #14
0
def test_subdatasets(path):
    from datalad.api import install
    # from scratch
    ds = Dataset(path)
    assert_false(ds.is_installed())
    eq_(ds.get_dataset_handles(), None)
    ds = ds.install()
    assert_true(ds.is_installed())
    eq_(ds.get_dataset_handles(), [])
    # create some file and commit it
    open(os.path.join(ds.path, 'test'), 'w').write('some')
    ds.install(path='test')
    assert_true(ds.is_installed())
    # TODO change to remember_state()
    ds.remember_state("Hello!", version=1)
    # add a subdataset
    subds = ds.install('subds', source=path)
    assert_true(subds.is_installed())
    subdss = ds.get_dataset_handles()
    eq_(len(subdss), 1)
    eq_(os.path.join(path, subdss[0]), subds.path)
    eq_(subds.path, ds.get_dataset_handles(absolute=True)[0])
    eq_(subdss, ds.get_dataset_handles(recursive=True))
    eq_(subdss, ds.get_dataset_handles(fulfilled=True))
    # don't have that right now
    assert_raises(NotImplementedError, ds.get_dataset_handles, pattern='sub*')
    ds.remember_state("with subds", version=2)
    ds.recall_state(1)
    assert_true(ds.is_installed())
    eq_(ds.get_dataset_handles(), [])
Beispiel #15
0
def test_composite_credential1():
    # basic test of composite credential
    keyring = MemoryKeyring()
    cred = _CCred1("name", keyring=keyring)
    # When queried, does the chain
    assert_equal(cred(), {'user': '******', 'password': '******'})
    # But the "Front" credential is exposed to the user
    assert_equal(cred.get('user'), 'user1')
    assert_equal(keyring.get('name', 'user'), 'user1')
    assert_raises(ValueError, cred.get, 'unknown_field')
    assert_equal(cred.get('password'), 'password1')
    assert_equal(keyring.get('name', 'password'), 'password1')
    # ATM composite credential stores "derived" ones unconditionally in the
    # keyring as well
    assert_equal(keyring.get('name:1', 'user'), 'user1_1')
    assert_equal(keyring.get('name:1', 'password'), 'password1_2')

    # and now enter new should remove "derived" entries
    cred.enter_new()
    assert_equal(keyring.get('name', 'user'), 'user2')
    assert_equal(keyring.get('name', 'password'), 'password2')
    assert_equal(keyring.get('name:1', 'user'), None)
    assert_equal(keyring.get('name:1', 'password'), None)
    # which would get reevaluated if requested
    assert_equal(keyring.entries, {'name:1': {}, 'name': {'user': '******', 'password': '******'}})
    assert_equal(cred(), {'user': '******', 'password': '******'})
Beispiel #16
0
def test_status_basics(path, linkpath, otherdir):
    if not on_windows:
        # make it more complicated by default
        ut.Path(linkpath).symlink_to(path, target_is_directory=True)
        path = linkpath

    with chpwd(path):
        assert_raises(NoDatasetArgumentFound, status)
    ds = Dataset(path).create()
    # outcome identical between ds= and auto-discovery
    with chpwd(path):
        assert_raises(IncompleteResultsError, status, path=otherdir)
        stat = status()
    eq_(stat, ds.status())
    assert_status('ok', stat)
    # we have a bunch of reports (be vague to be robust to future changes
    assert len(stat) > 2
    # check the composition
    for s in stat:
        eq_(s['status'], 'ok')
        eq_(s['action'], 'status')
        eq_(s['state'], 'clean')
        eq_(s['type'], 'file')
        assert_in('gitshasum', s)
        assert_in('bytesize', s)
        eq_(s['refds'], ds.path)
Beispiel #17
0
def _test_match_basic(matcher, query):
    extracts = dict(
        xpaths={'text': 'text()'},
        csss={'favorite': '.class1::text'}
    )
    m = matcher(query, **extracts)

    mg = m(dict(response="<div></div>"))
    ok_(inspect.isgenerator(mg))
    eq_(list(mg), [])  # there should be no hits

    mg = m(dict(response=sample1.response))
    ok_(inspect.isgenerator(mg))
    hits = list(mg)
    eq_(len(hits), 3)
    for hit, a_html, a_text, class1_text in zip(
            hits, sample1.a_htmls, sample1.a_texts, sample1.class1_texts):
        ok_(hit['response'])
        eq_(hit['match'], a_html)
        eq_(hit['text'], a_text)
        eq_(hit.get('favorite', None), class1_text)

    m = matcher(query, min_count=4, **extracts)
    mg = m(dict(response=sample1.response))
    ok_(inspect.isgenerator(mg))
    assert_raises(ValueError, list, mg)

    m = matcher(query, max_count=2, **extracts)
    mg = m(dict(response=sample1.response))
    ok_(inspect.isgenerator(mg))
    assert_raises(ValueError, list, mg)
Beispiel #18
0
def test_optimized_cloning(path):
    # make test repo with one file and one commit
    originpath = op.join(path, 'origin')
    repo = GitRepo(originpath, create=True)
    with open(op.join(originpath, 'test'), 'w') as f:
        f.write('some')
    repo.add('test')
    repo.commit('init')
    ok_clean_git(originpath, annex=False)
    from glob import glob

    def _get_inodes(repo):
        return dict(
            [(os.path.join(*o.split(os.sep)[-2:]),
              os.stat(o).st_ino)
             for o in glob(os.path.join(repo.path,
                                        repo.get_git_dir(repo),
                                        'objects', '*', '*'))])

    origin_inodes = _get_inodes(repo)
    # now clone it in different ways and see what happens to the object storage
    from datalad.support.network import get_local_file_url
    clonepath = op.join(path, 'clone')
    for src in (originpath, get_local_file_url(originpath)):
        # deprecated
        assert_raises(DeprecatedError, GitRepo, url=src, path=clonepath)
        clone = GitRepo.clone(url=src, path=clonepath, create=True)
        clone_inodes = _get_inodes(clone)
        eq_(origin_inodes, clone_inodes, msg='with src={}'.format(src))
        rmtree(clonepath)
Beispiel #19
0
def test_GitRepo_gitignore(path):

    gr = GitRepo(path, create=True)
    sub = GitRepo(op.join(path, 'ignore-sub.me'))
    # we need to commit something, otherwise add_submodule
    # will already refuse the submodule for having no commit
    sub.add('a_file.txt')
    sub.commit()

    from ..exceptions import GitIgnoreError

    with open(op.join(path, '.gitignore'), "w") as f:
        f.write("*.me")

    with assert_raises(GitIgnoreError) as cme:
        gr.add('ignore.me')
    eq_(cme.exception.paths, ['ignore.me'])

    with assert_raises(GitIgnoreError) as cme:
        gr.add_submodule(path='ignore-sub.me')
    eq_(cme.exception.paths, ['ignore-sub.me'])

    with assert_raises(GitIgnoreError) as cme:
        gr.add(['ignore.me', 'dontigno.re', op.join('ignore-sub.me', 'a_file.txt')])
    eq_(set(cme.exception.paths), {'ignore.me', 'ignore-sub.me'})

    eq_(gr.get_gitattributes('.')['.'], {})  # nothing is recorded within .gitattributes
Beispiel #20
0
def test_ssh_get_connection():

    manager = SSHManager()
    assert manager._socket_dir is None, \
        "Should be unset upon initialization. Got %s" % str(manager._socket_dir)
    c1 = manager.get_connection('ssh://localhost')
    assert manager._socket_dir, "Should be set after interactions with the manager"
    assert_is_instance(c1, SSHConnection)

    # subsequent call returns the very same instance:
    ok_(manager.get_connection('ssh://localhost') is c1)

    # fail on malformed URls (meaning: our fancy URL parser can't correctly
    # deal with them):
    #assert_raises(ValueError, manager.get_connection, 'localhost')
    # we now allow those simple specifications of host to get_connection
    c2 = manager.get_connection('localhost')
    assert_is_instance(c2, SSHConnection)

    # but should fail if it looks like something else
    assert_raises(ValueError, manager.get_connection, 'localhost/')
    assert_raises(ValueError, manager.get_connection, ':localhost')

    # we can do what urlparse cannot
    # assert_raises(ValueError, manager.get_connection, 'someone@localhost')
    # next one is considered a proper url by urlparse (netloc:'',
    # path='/localhost), but eventually gets turned into SSHRI(hostname='ssh',
    # path='/localhost') -- which is fair IMHO -> invalid test
    # assert_raises(ValueError, manager.get_connection, 'ssh:/localhost')

    manager.close()
Beispiel #21
0
def test_install_skip_list_arguments(src, path, path_outside):
    ds = install(path, source=src)
    ok_(ds.is_installed())

    # install a list with valid and invalid items:
    result = ds.install(
        path=['subm 1', 'not_existing', path_outside, '2'],
        get_data=False,
        on_failure='ignore', result_xfm=None, return_type='list')
    # good and bad results together
    ok_(isinstance(result, list))
    eq_(len(result), 4)
    # check that we have an 'impossible' status for both invalid args
    # but all the other tasks have been accomplished
    for skipped, msg in [(opj(ds.path, 'not_existing'), "path does not exist"),
                         (path_outside, "path not associated with any dataset")]:
        assert_result_count(
            result, 1, status='impossible', message=msg, path=skipped)
    for sub in [Dataset(opj(path, 'subm 1')), Dataset(opj(path, '2'))]:
        assert_result_count(
            result, 1, status='ok',
            message=('Installed subdataset in order to get %s', sub.path))
        ok_(sub.is_installed())

    # return of get is always a list, by default, even if just one thing was gotten
    # in this case 'subm1' was already obtained above, so this will get this
    # content of the subdataset
    with assert_raises(IncompleteResultsError) as cme:
        ds.install(path=['subm 1', 'not_existing'])
    with assert_raises(IncompleteResultsError) as cme:
        ds.get(path=['subm 1', 'not_existing'])
Beispiel #22
0
def test_implicit_install(src, dst):

    origin_top = create(src)
    origin_sub = origin_top.create("sub")
    origin_subsub = origin_sub.create("subsub")
    with open(opj(origin_top.path, "file1.txt"), "w") as f:
        f.write("content1")
    origin_top.add("file1.txt")
    with open(opj(origin_sub.path, "file2.txt"), "w") as f:
        f.write("content2")
    origin_sub.add("file2.txt")
    with open(opj(origin_subsub.path, "file3.txt"), "w") as f:
        f.write("content3")
    origin_subsub.add("file3.txt")
    origin_top.save(recursive=True)

    # first, install toplevel:
    ds = install(dst, source=src)
    ok_(ds.is_installed())

    sub = Dataset(opj(ds.path, "sub"))
    ok_(not sub.is_installed())
    subsub = Dataset(opj(sub.path, "subsub"))
    ok_(not subsub.is_installed())

    # fail on obscure non-existing one
    assert_raises(IncompleteResultsError, ds.install, source='obscure')

    # install 3rd level and therefore implicitly the 2nd:
    result = ds.install(path=opj("sub", "subsub"))
    ok_(sub.is_installed())
    ok_(subsub.is_installed())
    # but by default implicit results are not reported
    eq_(result, subsub)

    # fail on obscure non-existing one in subds
    assert_raises(IncompleteResultsError, ds.install, source=opj('sub', 'obscure'))

    # clean up, the nasty way
    rmtree(dst, chmod_files=True)
    ok_(not exists(dst))

    # again first toplevel:
    ds = install(dst, source=src)
    ok_(ds.is_installed())
    sub = Dataset(opj(ds.path, "sub"))
    ok_(not sub.is_installed())
    subsub = Dataset(opj(sub.path, "subsub"))
    ok_(not subsub.is_installed())

    # now implicit but without an explicit dataset to install into
    # (deriving from CWD):
    with chpwd(dst):
        # don't ask for the file content to make return value comparison
        # simpler
        result = get(path=opj("sub", "subsub"), get_data=False, result_xfm='datasets')
        ok_(sub.is_installed())
        ok_(subsub.is_installed())
        eq_(result, [sub, subsub])
Beispiel #23
0
def test_invalid_call(path):
    with chpwd(path):
        # no dataset, no luck
        assert_raises(NoDatasetArgumentFound, run, 'doesntmatter')
        # dirty dataset
        ds = Dataset(path).create()
        create_tree(ds.path, {'this': 'dirty'})
        assert_status('impossible', run('doesntmatter', on_failure='ignore'))
Beispiel #24
0
 def test_addurls_url_filename_fail(self, path):
     ds = Dataset(path).create(force=True)
     with chpwd(path):
         assert_raises(IncompleteResultsError,
                       ds.addurls,
                       self.json_file,
                       "{url}/nofilename/",
                       "{_url0}/{_url_filename}")
Beispiel #25
0
def test_uninstall_invalid(path):
    ds = Dataset(path).create(force=True)
    for method in (uninstall, remove, drop):
        assert_raises(InsufficientArgumentsError, method)
        # refuse to touch stuff outside the dataset
        assert_status('error', method(dataset=ds, path='..', on_failure='ignore'))
        # same if it doesn't exist, for consistency
        assert_status('error', method(dataset=ds, path='../madeupnonexist', on_failure='ignore'))
Beispiel #26
0
def test_split_remote_branch():
    r, b = split_remote_branch("MyRemote/SimpleBranch")
    eq_(r, "MyRemote")
    eq_(b, "SimpleBranch")
    r, b = split_remote_branch("MyRemote/Branch/with/slashes")
    eq_(r, "MyRemote")
    eq_(b, "Branch/with/slashes")
    assert_raises(AssertionError, split_remote_branch, "NoSlashesAtAll")
    assert_raises(AssertionError, split_remote_branch, "TrailingSlash/")
Beispiel #27
0
def test_invalid_call(path):
    # no dataset
    assert_raises(ValueError, create_sibling_github, 'bogus', dataset=path)
    ds = Dataset(path).create()
    # no user
    assert_raises(gh.BadCredentialsException,
                  ds.create_sibling_github,
                  'bogus',
                  github_login='******')
Beispiel #28
0
def test_invalid_call(path):
    # inter-option dependencies
    assert_raises(
        ValueError,
        annotate_paths, '',
        force_subds_discovery=True, force_parentds_discovery=False)
    # modified_since needs a actual dataset
    assert_raises(
        ValueError,
        annotate_paths, dataset=path, modified="something")
Beispiel #29
0
    def test_addurls(self, path):
        ds = Dataset(path).create(force=True)

        def get_annex_commit_counts():
            return int(
                ds.repo.repo.git.rev_list("--count", "git-annex").strip())

        n_annex_commits = get_annex_commit_counts()

        with chpwd(path):
            ds.addurls(self.json_file, "{url}", "{name}")

            filenames = ["a", "b", "c"]
            for fname in filenames:
                ok_exists(fname)

            for (fname, meta), subdir in zip(ds.repo.get_metadata(filenames),
                                             ["foo", "bar", "foo"]):
                assert_dict_equal(meta,
                                  {"subdir": [subdir], "name": [fname]})

            # Ignore this check if we're faking dates because that disables
            # batch mode.
            if not os.environ.get('DATALAD_FAKE__DATES'):
                # We should have two new commits on the git-annex: one for the
                # added urls and one for the added metadata.
                eq_(n_annex_commits + 2, get_annex_commit_counts())

            # Add to already existing links, overwriting.
            with swallow_logs(new_level=logging.DEBUG) as cml:
                ds.addurls(self.json_file, "{url}", "{name}",
                           ifexists="overwrite")
                for fname in filenames:
                    assert_in("Removing {}".format(os.path.join(path, fname)),
                              cml.out)

            # Add to already existing links, skipping.
            assert_in_results(
                ds.addurls(self.json_file, "{url}", "{name}", ifexists="skip"),
                action="addurls",
                status="notneeded")

            # Add to already existing links works, as long content is the same.
            ds.addurls(self.json_file, "{url}", "{name}")

            # But it fails if something has changed.
            ds.unlock("a")
            with open("a", "w") as ofh:
                ofh.write("changed")
            ds.save("a")

            assert_raises(IncompleteResultsError,
                          ds.addurls,
                          self.json_file, "{url}", "{name}")
Beispiel #30
0
def test_publish_plain_git(origin, src_path, dst_path):
    # TODO: Since it's mostly the same, melt with test_publish_simple

    # prepare src
    source = install(src_path, source=origin, recursive=True)
    # forget we cloned it (provide no 'origin' anymore), which should lead to
    # setting tracking branch to target:
    source.repo.remove_remote("origin")

    # create plain git at target:
    target = GitRepo(dst_path, create=True)
    target.checkout("TMP", ["-b"])
    source.repo.add_remote("target", dst_path)

    res = publish(dataset=source, to="target", result_xfm='datasets')
    eq_(res, [source])

    ok_clean_git(source.repo, annex=None)
    ok_clean_git(target, annex=None)
    eq_(list(target.get_branch_commits("master")),
        list(source.repo.get_branch_commits("master")))

    # don't fail when doing it again
    res = publish(dataset=source, to="target")
    # and nothing is pushed
    assert_result_count(res, 1, status='notneeded')

    ok_clean_git(source.repo, annex=None)
    ok_clean_git(target, annex=None)
    eq_(list(target.get_branch_commits("master")),
        list(source.repo.get_branch_commits("master")))

    # some modification:
    with open(opj(src_path, 'test_mod_file'), "w") as f:
        f.write("Some additional stuff.")
    source.add(opj(src_path, 'test_mod_file'), to_git=True,
               message="Modified.")
    ok_clean_git(source.repo, annex=None)

    res = publish(dataset=source, to='target', result_xfm='datasets')
    eq_(res, [source])

    ok_clean_git(dst_path, annex=None)
    eq_(list(target.get_branch_commits("master")),
        list(source.repo.get_branch_commits("master")))

    # amend and change commit msg in order to test for force push:
    source.repo.commit("amended", options=['--amend'])
    # push should be rejected (non-fast-forward):
    assert_raises(IncompleteResultsError,
                  publish, dataset=source, to='target', result_xfm='datasets')
    # push with force=True works:
    res = publish(dataset=source, to='target', result_xfm='datasets', force=True)
    eq_(res, [source])
Beispiel #31
0
def test_invalid_url(path):

    assert_raises(IncompleteResultsError, clone, 'osf://q8xnk/somepath', path)
Beispiel #32
0
def test_obtain(path):
    ds = create(path)
    cfg = ConfigManager(ds)
    dummy = 'datalad.test.dummy'
    # we know nothing and we don't know how to ask
    assert_raises(RuntimeError, cfg.obtain, dummy)
    # can report known ones
    cfg.add(dummy, '5.3')
    assert_equal(cfg.obtain(dummy), '5.3')
    # better type
    assert_equal(cfg.obtain(dummy, valtype=float), 5.3)
    # don't hide type issues, float doesn't become an int magically
    assert_raises(ValueError, cfg.obtain, dummy, valtype=int)
    # inject some prior knowledge
    from datalad.interface.common_cfg import definitions as cfg_defs
    cfg_defs[dummy] = dict(type=float)
    # no we don't need to specify a type anymore
    assert_equal(cfg.obtain(dummy), 5.3)
    # but if we remove the value from the config, all magic is gone
    cfg.unset(dummy)
    # we know nothing and we don't know how to ask
    assert_raises(RuntimeError, cfg.obtain, dummy)

    #
    # test actual interaction
    #
    @with_testsui()
    def ask():
        # fail on unkown dialog type
        assert_raises(ValueError, cfg.obtain, dummy, dialog_type='Rorschach_test')
    ask()

    # ask nicely, and get a value of proper type using the preconfiguration
    @with_testsui(responses='5.3')
    def ask():
        assert_equal(
            cfg.obtain(dummy, dialog_type='question', text='Tell me'), 5.3)
    ask()

    # preconfigure even more, to get the most compact call
    cfg_defs[dummy]['ui'] = ('question', dict(text='tell me', title='Gretchen Frage'))

    @with_testsui(responses='5.3')
    def ask():
        assert_equal(cfg.obtain(dummy), 5.3)
    ask()

    @with_testsui(responses='murks')
    def ask():
        assert_raises(ValueError, cfg.obtain, dummy)
    ask()

    # fail to store when destination is not specified, will not even ask
    @with_testsui()
    def ask():
        assert_raises(ValueError, cfg.obtain, dummy, store=True)
    ask()

    # but we can preconfigure it
    cfg_defs[dummy]['destination'] = 'broken'

    @with_testsui(responses='5.3')
    def ask():
        assert_raises(ValueError, cfg.obtain, dummy, store=True)
    ask()

    # fixup destination
    cfg_defs[dummy]['destination'] = 'dataset'

    @with_testsui(responses='5.3')
    def ask():
        assert_equal(cfg.obtain(dummy, store=True), 5.3)
    ask()

    # now it won't have to ask again
    @with_testsui()
    def ask():
        assert_equal(cfg.obtain(dummy), 5.3)
    ask()

    # wipe it out again
    cfg.unset(dummy)
    assert_not_in(dummy, cfg)
Beispiel #33
0
def test_something(path, new_home):
    # read nothing, has nothing
    cfg = ConfigManager(dataset_only=True)
    assert_false(len(cfg))
    # now read the example config
    cfg = ConfigManager(Dataset(opj(path, 'ds')), dataset_only=True)
    assert_equal(len(cfg), 3)
    assert_in('something.user', cfg)
    # multi-value
    assert_equal(len(cfg['something.user']), 2)
    assert_equal(cfg['something.user'], ('name=Jane Doe', '[email protected]'))

    assert_true(cfg.has_section('something'))
    assert_false(cfg.has_section('somethingelse'))
    assert_equal(sorted(cfg.sections()), ['onemore.complicated の beast with.dot', 'something'])
    assert_true(cfg.has_option('something', 'user'))
    assert_false(cfg.has_option('something', 'us?er'))
    assert_false(cfg.has_option('some?thing', 'user'))
    assert_equal(sorted(cfg.options('something')), ['myint', 'user'])
    assert_equal(cfg.options('onemore.complicated の beast with.dot'), ['findme'])

    assert_equal(
        sorted(cfg.items()),
        [('onemore.complicated の beast with.dot.findme', '5.0'),
         ('something.myint', '3'),
         ('something.user', ('name=Jane Doe', '[email protected]'))])
    assert_equal(
        sorted(cfg.items('something')),
        [('something.myint', '3'),
         ('something.user', ('name=Jane Doe', '[email protected]'))])

    # always get all values
    assert_equal(
        cfg.get('something.user'),
        ('name=Jane Doe', '[email protected]'))
    assert_raises(KeyError, cfg.__getitem__, 'somedthing.user')
    assert_equal(cfg.getfloat('onemore.complicated の beast with.dot', 'findme'), 5.0)
    assert_equal(cfg.getint('something', 'myint'), 3)
    assert_equal(cfg.getbool('something', 'myint'), True)
    assert_equal(cfg.getbool('doesnot', 'exist', default=True), True)
    assert_raises(TypeError, cfg.getbool, 'something', 'user')

    # gitpython-style access
    assert_equal(cfg.get('something.myint'), cfg.get_value('something', 'myint'))
    assert_equal(cfg.get_value('doesnot', 'exist', default='oohaaa'), 'oohaaa')
    # weired, but that is how it is
    assert_raises(KeyError, cfg.get_value, 'doesnot', 'exist', default=None)

    # modification follows
    cfg.add('something.new', 'の')
    assert_equal(cfg.get('something.new'), 'の')
    # sections are added on demand
    cfg.add('unheard.of', 'fame')
    assert_true(cfg.has_section('unheard.of'))
    comp = cfg.items('something')
    cfg.rename_section('something', 'this')
    assert_true(cfg.has_section('this'))
    assert_false(cfg.has_section('something'))
    # direct comparision would fail, because of section prefix
    assert_equal(len(cfg.items('this')), len(comp))
    # fail if no such section
    with swallow_logs():
        assert_raises(CommandError, cfg.rename_section, 'nothere', 'irrelevant')
    assert_true(cfg.has_option('this', 'myint'))
    cfg.unset('this.myint')
    assert_false(cfg.has_option('this', 'myint'))

    # batch a changes
    cfg.add('mike.wants.to', 'know', reload=False)
    assert_false('mike.wants.to' in cfg)
    cfg.add('mike.wants.to', 'eat')
    assert_true('mike.wants.to' in cfg)
    assert_equal(len(cfg['mike.wants.to']), 2)

    # set a new one:
    cfg.set('mike.should.have', 'known')
    assert_in('mike.should.have', cfg)
    assert_equal(cfg['mike.should.have'], 'known')
    # set an existing one:
    cfg.set('mike.should.have', 'known better')
    assert_equal(cfg['mike.should.have'], 'known better')
    # set, while there are several matching ones already:
    cfg.add('mike.should.have', 'a meal')
    assert_equal(len(cfg['mike.should.have']), 2)
    # raises with force=False
    assert_raises(CommandError,
                  cfg.set, 'mike.should.have', 'a beer', force=False)
    assert_equal(len(cfg['mike.should.have']), 2)
    # replaces all matching ones with force=True
    cfg.set('mike.should.have', 'a beer', force=True)
    assert_equal(cfg['mike.should.have'], 'a beer')

    # fails unknown location
    assert_raises(ValueError, cfg.add, 'somesuch', 'shit', where='umpalumpa')

    # very carefully test non-local config
    # so carefully that even in case of bad weather Yarik doesn't find some
    # lame datalad unittest sections in his precious ~/.gitconfig
    with patch.dict('os.environ',
                    {'HOME': new_home, 'DATALAD_SNEAKY_ADDITION': 'ignore'}):
        global_gitconfig = opj(new_home, '.gitconfig')
        assert(not exists(global_gitconfig))
        globalcfg = ConfigManager(dataset_only=False)
        assert_not_in('datalad.unittest.youcan', globalcfg)
        assert_in('datalad.sneaky.addition', globalcfg)
        cfg.add('datalad.unittest.youcan', 'removeme', where='global')
        assert(exists(global_gitconfig))
        # it did not go into the dataset's config!
        assert_not_in('datalad.unittest.youcan', cfg)
        # does not monitor additions!
        globalcfg.reload(force=True)
        assert_in('datalad.unittest.youcan', globalcfg)
        with swallow_logs():
            assert_raises(
                CommandError,
                globalcfg.unset,
                'datalad.unittest.youcan',
                where='local')
        assert(globalcfg.has_section('datalad.unittest'))
        globalcfg.unset('datalad.unittest.youcan', where='global')
        # but after we unset the only value -- that section is no longer listed
        assert (not globalcfg.has_section('datalad.unittest'))
        assert_not_in('datalad.unittest.youcan', globalcfg)
        # although it does leaves empty section behind in the file
        ok_file_has_content(global_gitconfig, '[datalad "unittest"]', strip=True)
        # remove_section to clean it up entirely
        globalcfg.remove_section('datalad.unittest', where='global')
        ok_file_has_content(global_gitconfig, "")

    cfg = ConfigManager(
        Dataset(opj(path, 'ds')),
        dataset_only=True,
        overrides={'datalad.godgiven': True})
    assert_equal(cfg.get('datalad.godgiven'), True)
    # setter has no effect
    cfg.set('datalad.godgiven', 'false')
    assert_equal(cfg.get('datalad.godgiven'), True)
Beispiel #34
0
def test_siblings(origin, repo_path, local_clone_path):

    sshurl = "ssh://push-remote.example.com"
    httpurl1 = "http://remote1.example.com/location"
    httpurl2 = "http://remote2.example.com/location"

    # insufficient arguments
    # we need a dataset to work at
    with chpwd(repo_path):  # not yet there
        assert_raises(InsufficientArgumentsError,
                      siblings,
                      'add',
                      url=httpurl1)

    # prepare src
    source = install(repo_path, source=origin, recursive=True)
    # pollute config
    depvar = 'remote.test-remote.datalad-publish-depends'
    source.config.add(depvar, 'stupid', where='local')

    # cannot configure unknown remotes as dependencies
    res = siblings('configure',
                   dataset=source,
                   name="test-remote",
                   url=httpurl1,
                   publish_depends=['r1', 'r2'],
                   on_failure='ignore',
                   result_renderer=None)
    assert_status('error', res)
    eq_(res[0]['message'],
        ('unknown sibling(s) specified as publication dependency: %s',
         set(('r1', 'r2'))))
    # prior config was not changed by failed call above
    eq_(source.config.get(depvar, None), 'stupid')

    res = siblings('configure',
                   dataset=source,
                   name="test-remote",
                   url=httpurl1,
                   result_xfm='paths',
                   result_renderer=None)

    eq_(res, [source.path])
    assert_in("test-remote", source.repo.get_remotes())
    eq_(httpurl1, source.repo.get_remote_url("test-remote"))

    # reconfiguring doesn't change anything
    siblings('configure',
             dataset=source,
             name="test-remote",
             url=httpurl1,
             result_renderer=None)
    assert_in("test-remote", source.repo.get_remotes())
    eq_(httpurl1, source.repo.get_remote_url("test-remote"))
    # re-adding doesn't work
    res = siblings('add',
                   dataset=source,
                   name="test-remote",
                   url=httpurl1,
                   on_failure='ignore',
                   result_renderer=None)
    assert_status('error', res)
    # only after removal
    res = siblings('remove',
                   dataset=source,
                   name="test-remote",
                   result_renderer=None)
    assert_status('ok', res)
    assert_not_in("test-remote", source.repo.get_remotes())
    res = siblings('add',
                   dataset=source,
                   name="test-remote",
                   url=httpurl1,
                   on_failure='ignore',
                   result_renderer=None)
    assert_status('ok', res)

    # add to another remote automagically taking it from the url
    # and being in the dataset directory
    with chpwd(source.path):
        res = siblings('add', url=httpurl2, result_renderer=None)
    assert_result_count(res, 1, name="remote2.example.com", type='sibling')
    assert_in("remote2.example.com", source.repo.get_remotes())

    # don't fail with conflicting url, when using force:
    res = siblings('configure',
                   dataset=source,
                   name="test-remote",
                   url=httpurl1 + "/elsewhere",
                   result_renderer=None)
    assert_status('ok', res)
    eq_(httpurl1 + "/elsewhere", source.repo.get_remote_url("test-remote"))

    # no longer a use case, I would need additional convincing that
    # this is anyhow useful other then triple checking other peoples
    # errors. for an actual check use 'query'
    # maybe it could be turned into a set of warnings when `configure`
    # alters an existing setting, but then why call configure, if you
    # want to keep the old values
    #with assert_raises(RuntimeError) as cm:
    #    add_sibling(dataset=source, name="test-remote",
    #                url=httpurl1 + "/elsewhere")
    #assert_in("""'test-remote' already exists with conflicting settings""",
    #          str(cm.exception))
    ## add a push url without force fails, since in a way the fetch url is the
    ## configured push url, too, in that case:
    #with assert_raises(RuntimeError) as cm:
    #    add_sibling(dataset=source, name="test-remote",
    #                url=httpurl1 + "/elsewhere",
    #                pushurl=sshurl, force=False)
    #assert_in("""'test-remote' already exists with conflicting settings""",
    #          str(cm.exception))

    # add push url (force):
    res = siblings('configure',
                   dataset=source,
                   name="test-remote",
                   url=httpurl1 + "/elsewhere",
                   pushurl=sshurl,
                   result_renderer=None)
    assert_status('ok', res)
    eq_(httpurl1 + "/elsewhere", source.repo.get_remote_url("test-remote"))
    eq_(sshurl, source.repo.get_remote_url("test-remote", push=True))

    # recursively:
    for r in siblings(
            'configure',
            dataset=source,
            name="test-remote",
            url=httpurl1 + "/%NAME",
            pushurl=sshurl + "/%NAME",
            recursive=True,
            # we need to disable annex queries, as it will try to access
            # the fake URL configured above
            get_annex_info=False):
        repo = GitRepo(r['path'], create=False)
        assert_in("test-remote", repo.get_remotes())
        url = repo.get_remote_url("test-remote")
        pushurl = repo.get_remote_url("test-remote", push=True)
        ok_(url.startswith(httpurl1 + '/' + basename(source.path)))
        ok_(url.endswith(basename(repo.path)))
        ok_(pushurl.startswith(sshurl + '/' + basename(source.path)))
        ok_(pushurl.endswith(basename(repo.path)))
        eq_(url, r['url'])
        eq_(pushurl, r['pushurl'])

    # recursively without template:
    for r in siblings(
            'configure',
            dataset=source,
            name="test-remote-2",
            url=httpurl1,
            pushurl=sshurl,
            recursive=True,
            # we need to disable annex queries, as it will try to access
            # the fake URL configured above
            get_annex_info=False,
            result_renderer=None):
        repo = GitRepo(r['path'], create=False)
        assert_in("test-remote-2", repo.get_remotes())
        url = repo.get_remote_url("test-remote-2")
        pushurl = repo.get_remote_url("test-remote-2", push=True)
        ok_(url.startswith(httpurl1))
        ok_(pushurl.startswith(sshurl))
        # FIXME: next condition used to compare the *Repo objects instead of
        # there paths. Due to missing annex-init in
        # datalad/tests/utils.py:clone_url this might not be the same, since
        # `source` actually is an annex, but after flavor 'clone' in
        # `with_testrepos` and then `install` any trace of an annex might be
        # gone in v5 (branch 'master' only), while in direct mode it still is
        # considered an annex. `repo` is forced to be a `GitRepo`, so we might
        # compare two objects of different classes while they actually are
        # pointing to the same repository.
        # See github issue #1854
        if repo.path != source.repo.path:
            ok_(url.endswith('/' + basename(repo.path)))
            ok_(pushurl.endswith(basename(repo.path)))
        eq_(url, r['url'])
        eq_(pushurl, r['pushurl'])

    # recursively without template and pushurl but full "hierarchy"
    # to a local clone
    for r in siblings(
            'configure',
            dataset=source,
            name="test-remote-3",
            url=local_clone_path,
            recursive=True,
            # we need to disable annex queries, as it will try to access
            # the fake URL configured above
            get_annex_info=False,
            result_renderer=None):
        repo = GitRepo(r['path'], create=False)
        assert_in("test-remote-3", repo.get_remotes())
        url = repo.get_remote_url("test-remote-3")
        pushurl = repo.get_remote_url("test-remote-3", push=True)

        eq_(
            normpath(url),
            normpath(
                opj(local_clone_path, relpath(str(r['path']), source.path))))
        # https://github.com/datalad/datalad/issues/3951
        ok_(not pushurl)  # no pushurl should be defined
    # 5621: Users shouldn't pass identical names for remote & common data source
    assert_raises(ValueError,
                  siblings,
                  'add',
                  dataset=source,
                  name='howdy',
                  url=httpurl1,
                  as_common_datasrc='howdy')
Beispiel #35
0
def test_create_raises(path, outside_path):

    ds = Dataset(path)
    # incompatible arguments (annex only):
    assert_raises(ValueError, ds.create, no_annex=True, description='some')
    assert_raises(ValueError, ds.create, no_annex=True, annex_opts=['some'])
    assert_raises(ValueError, ds.create, no_annex=True, annex_init_opts=['some'])

    with open(opj(path, "somefile.tst"), 'w') as f:
        f.write("some")
    # non-empty without `force`:
    assert_raises(ValueError, ds.create, force=False)
    # non-empty with `force`:
    ds.create(force=True)
    # create sub outside of super:
    assert_raises(ValueError, ds.create, outside_path)

    # create a sub:
    ds.create('sub')
    # fail when doing it again without `force`:
    assert_raises(ValueError, ds.create, 'sub')
Beispiel #36
0
def test_within_ds_file_search(path):
    try:
        import mutagen
    except ImportError:
        raise SkipTest
    ds = Dataset(path).create(force=True)
    # override default and search for datasets and files for this test
    for m in ('egrep', 'textblob', 'autofield'):
        ds.config.add('datalad.search.index-{}-documenttype'.format(m),
                      'all',
                      where='dataset')
    ds.config.add('datalad.metadata.nativetype', 'audio', where='dataset')
    makedirs(opj(path, 'stim'))
    for src, dst in (('audio.mp3', opj('stim', 'stim1.mp3')), ):
        copy(opj(dirname(dirname(__file__)), 'tests', 'data', src),
             opj(path, dst))
    ds.save()
    ok_file_under_git(path, opj('stim', 'stim1.mp3'), annexed=True)
    # If it is not under annex, below addition of metadata silently does
    # not do anything
    ds.repo.set_metadata(opj('stim', 'stim1.mp3'), init={'importance': 'very'})
    ds.aggregate_metadata()
    assert_repo_status(ds.path)
    # basic sanity check on the metadata structure of the dataset
    dsmeta = ds.metadata('.', reporton='datasets')[0]['metadata']
    for src in ('audio', ):
        # something for each one
        assert_in(src, dsmeta)
        # each src declares its own context
        assert_in('@context', dsmeta[src])
        # we have a unique content metadata summary for each src
        assert_in(src, dsmeta['datalad_unique_content_properties'])

    # test default behavior
    with swallow_outputs() as cmo:
        ds.search(show_keys='name', mode='textblob')

        assert_in("""\
id
meta
parentds
path
type
""", cmo.out)

    target_out = """\
annex.importance
annex.key
audio.bitrate
audio.duration(s)
audio.format
audio.music-Genre
audio.music-album
audio.music-artist
audio.music-channels
audio.music-sample_rate
audio.name
audio.tracknumber
datalad_core.id
datalad_core.refcommit
id
parentds
path
type
"""

    # test default behavior while limiting set of keys reported
    with swallow_outputs() as cmo:
        ds.search(['\.id', 'artist$'], show_keys='short')
        out_lines = [l for l in cmo.out.split(os.linesep) if l]
        # test that only the ones matching were returned
        assert_equal([l for l in out_lines if not l.startswith(' ')],
                     ['audio.music-artist', 'datalad_core.id'])
        # more specific test which would also test formatting
        assert_equal(
            out_lines,
            [
                'audio.music-artist',
                ' in  1 datasets',
                " has 1 unique values: 'dlartist'",
                'datalad_core.id',
                ' in  1 datasets',
                # we have them sorted
                " has 1 unique values: '%s'" % ds.id
            ])

    with assert_raises(ValueError) as cme:
        ds.search('*wrong')
    assert_re_in(
        r"regular expression '\(\?i\)\*wrong' \(original: '\*wrong'\) is incorrect: ",
        str(cme.exception))

    # check generated autofield index keys
    with swallow_outputs() as cmo:
        ds.search(mode='autofield', show_keys='name')
        # it is impossible to assess what is different from that dump
        assert_in(target_out, cmo.out)

    assert_result_count(ds.search('blablob#'), 0)
    # now check that we can discover things from the aggregated metadata
    for mode, query, hitpath, matched in (
        ('egrep', ':mp3', opj('stim', 'stim1.mp3'), {
            'audio.format': 'mp3'
        }),
            # same as above, leading : is stripped, in indicates "ALL FIELDS"
        ('egrep', 'mp3', opj('stim', 'stim1.mp3'), {
            'audio.format': 'mp3'
        }),
            # same as above, but with AND condition
            # get both matches
        ('egrep', ['mp3', 'type:file'], opj('stim', 'stim1.mp3'), {
            'type': 'file',
            'audio.format': 'mp3'
        }),
            # case insensitive search
        ('egrep', 'mp3', opj('stim', 'stim1.mp3'), {
            'audio.format': 'mp3'
        }),
            # field selection by expression
        ('egrep', 'audio\.+:mp3', opj('stim', 'stim1.mp3'), {
            'audio.format': 'mp3'
        }),
            # random keyword query
        ('textblob', 'mp3', opj('stim', 'stim1.mp3'), {
            'meta': 'mp3'
        }),
            # report which field matched with auto-field
        ('autofield', 'mp3', opj('stim', 'stim1.mp3'), {
            'audio.format': 'mp3'
        }),
            # XXX next one is not supported by current text field analyser
            # decomposes the mime type in [mime, audio, mp3]
            # ('autofield',
            # "'mime:audio/mp3'",
            # opj('stim', 'stim1.mp3'),
            # 'audio.format', 'mime:audio/mp3'),
            # but this one works
        ('autofield', "'mime audio mp3'", opj('stim', 'stim1.mp3'), {
            'audio.format': 'mp3'
        }),
            # TODO extend with more complex queries to test whoosh
            # query language configuration
    ):
        res = ds.search(query, mode=mode, full_record=True)
        assert_result_count(
            res,
            1,
            type='file',
            path=opj(ds.path, hitpath),
            # each file must report the ID of the dataset it is from, critical for
            # discovering related content
            dsid=ds.id)
        # in egrep we currently do not search unique values
        # and the queries above aim at files
        assert_result_count(res, 1 if mode == 'egrep' else 2)
        if mode != 'egrep':
            assert_result_count(res,
                                1,
                                type='dataset',
                                path=ds.path,
                                dsid=ds.id)
        # test the key and specific value of the match
        for matched_key, matched_val in matched.items():
            assert_in(matched_key, res[-1]['query_matched'])
            assert_equal(res[-1]['query_matched'][matched_key], matched_val)

    # test a suggestion msg being logged if no hits and key is a bit off
    with swallow_logs(new_level=logging.INFO) as cml:
        res = ds.search('audio.formats:mp3 audio.bitsrate:1', mode='egrep')
        assert not res
        assert_in('Did you mean any of', cml.out)
        assert_in('audio.format', cml.out)
        assert_in('audio.bitrate', cml.out)
Beispiel #37
0
def test_rerun(path, nodspath):
    ds = Dataset(path).create()
    sub = ds.create('sub')
    probe_path = op.join(sub.path, 'sequence')
    # run inside the dataset
    with chpwd(path), \
            swallow_outputs():
        ds.run('echo x$(cat sub/sequence) > sub/sequence')
    # command ran once, all clean
    assert_repo_status(ds.path)
    eq_('x\n', open(probe_path).read())
    # now, for a rerun we can be anywhere, PWD and all are recorded
    # moreover, rerun must figure out which bits to unlock, even in
    # subdatasets
    with chpwd(nodspath), \
            swallow_outputs():
        ds.rerun()
    assert_repo_status(ds.path)
    # ran twice now
    eq_('xx\n', open(probe_path).read())

    # Rerunning from a subdataset skips the command.
    _, sub_info = get_run_info(ds, sub.repo.repo.head.commit.message)
    eq_(ds.id, sub_info["dsid"])
    assert_result_count(
        sub.rerun(return_type="list", on_failure="ignore"),
        1, status="impossible", action="run", rerun_action="skip")
    eq_('xx\n', open(probe_path).read())

    # Rerun fails with a dirty repo.
    dirt = op.join(path, "dirt")
    with open(dirt, "w") as fh:
        fh.write("")
    assert_status('impossible', ds.rerun(on_failure="ignore"))
    remove(dirt)
    assert_repo_status(ds.path)

    # Make a non-run commit.
    with open(op.join(path, "nonrun-file"), "w") as f:
        f.write("foo")
    ds.save("nonrun-file")
    # Now rerun the buried command.
    ds.rerun(revision="HEAD~", message="rerun buried")
    eq_('xxx\n', open(probe_path).read())
    # Also check that the messasge override worked.
    eq_(ds.repo.format_commit("%B").splitlines()[0],
        "[DATALAD RUNCMD] rerun buried")
    # Or a range of commits, skipping non-run commits.
    ds.rerun(since="HEAD~3")
    eq_('xxxxx\n', open(probe_path).read())
    # Or --since= to run all reachable commits.
    ds.rerun(since="")
    eq_('xxxxxxxxxx\n', open(probe_path).read())

    # We can get back a report of what would happen rather than actually
    # rerunning anything.
    report = ds.rerun(since="", report=True, return_type="list")
    # Nothing changed.
    eq_('xxxxxxxxxx\n', open(probe_path).read())
    assert_result_count(report, 1, rerun_action="skip")
    report[-1]["commit"] == ds.repo.get_hexsha()

    # If a file is dropped, we remove it instead of unlocking it.
    ds.drop(probe_path, check=False)
    with swallow_outputs():
        ds.rerun()

    eq_('x\n', open(probe_path).read())
    # If the history to rerun has a merge commit, we abort.
    ds.repo.checkout("HEAD~3", options=["-b", "topic"])
    with open(op.join(path, "topic-file"), "w") as f:
        f.write("topic")
    ds.save("topic-file")
    ds.repo.checkout("master")
    ds.repo.merge("topic")
    assert_repo_status(ds.path)
    assert_raises(IncompleteResultsError, ds.rerun)
Beispiel #38
0
def test_assert_git_annex_branch_published(path):
    repo_a = AnnexRepo(opj(path, "a"), create=True)
    repo_b = AnnexRepo(opj(path, "b"), create=True)
    with assert_raises(AssertionError):
        assert_git_annex_branch_published(repo_a, repo_b)
Beispiel #39
0
def test_search_outside1_noninteractive_ui(tdir):
    # we should raise an informative exception
    with chpwd(tdir):
        with assert_raises(NoDatasetFound) as cme:
            list(search("bu"))
        assert_in('run interactively', str(cme.exception))
Beispiel #40
0
def test_publish_plain_git(origin, src_path, dst_path):
    # TODO: Since it's mostly the same, melt with test_publish_simple

    # prepare src
    source = install(src_path, source=origin, recursive=True)
    # forget we cloned it by removing remote, which should lead to
    # setting tracking branch to target:
    source.repo.remove_remote(DEFAULT_REMOTE)

    # create plain git at target:
    target = GitRepo(dst_path, create=True)
    target.checkout("TMP", ["-b"])
    source.repo.add_remote("target", dst_path)

    res = publish(dataset=source, to="target", result_xfm='datasets')
    eq_(res, [source])

    assert_repo_status(source.repo, annex=None)
    assert_repo_status(target, annex=None)
    eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)),
        list(source.repo.get_branch_commits_(DEFAULT_BRANCH)))

    # don't fail when doing it again
    res = publish(dataset=source, to="target")
    # and nothing is pushed
    assert_result_count(res, 1, status='notneeded')

    assert_repo_status(source.repo, annex=None)
    assert_repo_status(target, annex=None)
    eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)),
        list(source.repo.get_branch_commits_(DEFAULT_BRANCH)))

    # some modification:
    with open(opj(src_path, 'test_mod_file'), "w") as f:
        f.write("Some additional stuff.")
    source.save(opj(src_path, 'test_mod_file'),
                to_git=True,
                message="Modified.")
    assert_repo_status(source.repo, annex=None)

    res = publish(dataset=source, to='target', result_xfm='datasets')
    eq_(res, [source])

    assert_repo_status(dst_path, annex=None)
    eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)),
        list(source.repo.get_branch_commits_(DEFAULT_BRANCH)))

    # amend and change commit msg in order to test for force push:
    source.repo.commit("amended", options=['--amend'])
    # push should be rejected (non-fast-forward):
    assert_raises(IncompleteResultsError,
                  publish,
                  dataset=source,
                  to='target',
                  result_xfm='datasets')
    # push with force=True works:
    res = publish(dataset=source,
                  to='target',
                  result_xfm='datasets',
                  force=True)
    eq_(res, [source])
Beispiel #41
0
def test_runner_failure(dir_):
    runner = Runner()
    with assert_raises(CommandError) as cme:
        runner.run(py2cmd('import sys; sys.exit(53)'))
    eq_(53, cme.exception.code)
def test_repo_diff(path, norepo):
    ds = Dataset(path).create()
    assert_repo_status(ds.path)
    assert_raises(ValueError, ds.repo.diff, fr='WTF', to='MIKE')

    if ds.repo.is_managed_branch():
        fr_base = DEFAULT_BRANCH
        to = DEFAULT_BRANCH
    else:
        fr_base = "HEAD"
        to = None

    # no diff
    eq_(ds.repo.diff(fr_base, to), {})
    # bogus path makes no difference
    eq_(ds.repo.diff(fr_base, to, paths=['THIS']), {})
    # let's introduce a known change
    create_tree(ds.path, {'new': 'empty'})
    ds.save(to_git=True)
    assert_repo_status(ds.path)
    eq_(
        ds.repo.diff(fr=fr_base + '~1', to=fr_base), {
            ut.Path(ds.repo.pathobj / 'new'): {
                'state': 'added',
                'type': 'file',
                'bytesize': 5,
                'gitshasum': '7b4d68d70fcae134d5348f5e118f5e9c9d3f05f6'
            }
        })
    # modify known file
    create_tree(ds.path, {'new': 'notempty'})
    eq_(
        ds.repo.diff(fr='HEAD', to=None),
        {
            ut.Path(ds.repo.pathobj / 'new'): {
                'state': 'modified',
                'type': 'file',
                # the beast is modified, but no change in shasum -> not staged
                'gitshasum': '7b4d68d70fcae134d5348f5e118f5e9c9d3f05f6',
                'prev_gitshasum': '7b4d68d70fcae134d5348f5e118f5e9c9d3f05f6'
            }
        })
    # per path query gives the same result
    eq_(ds.repo.diff(fr=fr_base, to=to),
        ds.repo.diff(fr=fr_base, to=to, paths=['new']))
    # also given a directory as a constraint does the same
    eq_(ds.repo.diff(fr=fr_base, to=to),
        ds.repo.diff(fr=fr_base, to=to, paths=['.']))
    # but if we give another path, it doesn't show up
    eq_(ds.repo.diff(fr=fr_base, to=to, paths=['other']), {})

    # make clean
    ds.save()
    assert_repo_status(ds.path)

    # untracked stuff
    create_tree(ds.path, {'deep': {'down': 'untracked', 'down2': 'tobeadded'}})
    # default is to report all files
    eq_(
        ds.repo.diff(fr='HEAD', to=None), {
            ut.Path(ds.repo.pathobj / 'deep' / 'down'): {
                'state': 'untracked',
                'type': 'file'
            },
            ut.Path(ds.repo.pathobj / 'deep' / 'down2'): {
                'state': 'untracked',
                'type': 'file'
            }
        })
    # but can be made more compact
    eq_(
        ds.repo.diff(fr='HEAD', to=None, untracked='normal'), {
            ut.Path(ds.repo.pathobj / 'deep'): {
                'state': 'untracked',
                'type': 'directory'
            }
        })

    # again a unmatching path constrainted will give an empty report
    eq_(ds.repo.diff(fr='HEAD', to=None, paths=['other']), {})
    # perfect match and anything underneath will do
    eq_(
        ds.repo.diff(fr='HEAD', to=None, paths=['deep']), {
            ut.Path(ds.repo.pathobj / 'deep' / 'down'): {
                'state': 'untracked',
                'type': 'file'
            },
            ut.Path(ds.repo.pathobj / 'deep' / 'down2'): {
                'state': 'untracked',
                'type': 'file'
            }
        })
Beispiel #43
0
def test_failed_install(dspath):
    ds = create(dspath)
    assert_raises(IncompleteResultsError,
                  ds.install,
                  "sub",
                  source="http://nonexistingreallyanything.somewhere/bla")
Beispiel #44
0
def _test_proxying_open(generate_load, verify_load, repo):
    annex = AnnexRepo(repo, create=True)
    fpath1 = opj(repo, "test")
    fpath2 = opj(repo, 'd1', 'd2', 'test2')
    # generate load
    fpath1 = generate_load(fpath1) or fpath1
    os.makedirs(dirname(fpath2))
    fpath2 = generate_load(fpath2) or fpath2
    annex.add([fpath1, fpath2])
    verify_load(fpath1)
    verify_load(fpath2)
    annex.commit("Added some files")

    # clone to another repo
    repo2 = repo + "_2"
    annex2 = AnnexRepo.clone(repo, repo2)
    # verify that can't access
    fpath1_2 = fpath1.replace(repo, repo2)
    fpath2_2 = fpath2.replace(repo, repo2)

    EXPECTED_EXCEPTIONS = (IOError, OSError)
    assert_raises(EXPECTED_EXCEPTIONS, verify_load, fpath1_2)

    with AutomagicIO():
        # verify that it doesn't even try to get files which do not exist
        with patch('datalad.support.annexrepo.AnnexRepo.get') as gricm:
            # if we request absent file
            assert_raises(EXPECTED_EXCEPTIONS, open, fpath1_2+"_", 'r')
            # no get should be called
            assert_false(gricm.called)
        verify_load(fpath1_2)
        verify_load(fpath2_2)
        # and even if we drop it -- we still can get it no problem
        annex2.drop(fpath2_2)
        assert_false(annex2.file_has_content(fpath2_2))
        verify_load(fpath2_2)
        assert_true(annex2.file_has_content(fpath2_2))
        annex2.drop(fpath2_2)
        assert_false(annex2.file_has_content(fpath2_2))
        assert_true(os.path.isfile(fpath2_2))

    # In check_once mode, if we drop it, it wouldn't be considered again
    annex2.drop(fpath2_2)
    assert_false(annex2.file_has_content(fpath2_2))
    with AutomagicIO(check_once=True):
        verify_load(fpath2_2)
        assert_true(annex2.file_has_content(fpath2_2))
        annex2.drop(fpath2_2)
        assert_false(annex2.file_has_content(fpath2_2))
        assert_false(os.path.isfile(fpath2_2))


    # if we override stdout with something not supporting fileno, like tornado
    # does which ruins using get under IPython
    # TODO: we might need to refuse any online logging in other places like that
    annex2.drop(fpath2_2)
    class StringIOfileno(StringIO):
        def fileno(self):
            raise Exception("I have no clue how to do fileno")

    with patch('sys.stdout', new_callable=StringIOfileno), \
         patch('sys.stderr', new_callable=StringIOfileno):
        with AutomagicIO():
            assert_false(annex2.file_has_content(fpath2_2))
            verify_load(fpath2_2)
            assert_true(annex2.file_has_content(fpath2_2))
Beispiel #45
0
def test_save(path):

    ds = Dataset(path)

    with open(opj(path, "new_file.tst"), "w") as f:
        f.write("something")

    ds.repo.add("new_file.tst", git=True)
    ok_(ds.repo.dirty)

    # no all_changes any longer
    with assert_raises(DeprecatedError):
        ds.save("add a new file", all_changes=True)

    ds.save("add a new file")
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

    with open(opj(path, "new_file.tst"), "w") as f:
        f.write("modify")

    ok_(ds.repo.dirty)
    ds.save("modified new_file.tst")
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

    # save works without ds and files given in the PWD
    with open(opj(path, "new_file.tst"), "w") as f:
        f.write("rapunzel")
    with chpwd(path):
        save("love rapunzel")
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

    # and also without `-a` when things are staged
    with open(opj(path, "new_file.tst"), "w") as f:
        f.write("exotic")
    ds.repo.add("new_file.tst", git=True)
    with chpwd(path):
        save("love marsians")
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

    files = ['one.txt', 'two.txt']
    for fn in files:
        with open(opj(path, fn), "w") as f:
            f.write(fn)

    ds.add([opj(path, f) for f in files])
    # superfluous call to save (add saved it already), should not fail
    # but report that nothing was saved
    assert_status('notneeded', ds.save("set of new files"))
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

    # create subdataset
    subds = ds.create('subds')
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))
    # modify subds
    with open(opj(subds.path, "some_file.tst"), "w") as f:
        f.write("something")
    subds.add('.')
    ok_clean_git(subds.path, annex=isinstance(subds.repo, AnnexRepo))
    # Note/TODO: ok_clean_git is failing in direct mode, due to staged but
    # uncommited .datalad (probably caused within create)
    ok_(ds.repo.dirty)
    # ensure modified subds is committed
    ds.save()
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

    # now introduce a change downstairs
    subds.create('someotherds')
    ok_clean_git(subds.path, annex=isinstance(subds.repo, AnnexRepo))
    ok_(ds.repo.dirty)
    # and save via subdataset path
    ds.save('subds')
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))
Beispiel #46
0
def test_target_ssh_simple(origin, src_path, target_rootpath):

    # prepare src
    source = install(src_path,
                     source=origin,
                     result_xfm='datasets',
                     return_type='item-or-list')

    target_path = opj(target_rootpath, "basic")
    with swallow_logs(new_level=logging.ERROR) as cml:
        create_sibling(dataset=source,
                       name="local_target",
                       sshurl="ssh://localhost:22",
                       target_dir=target_path,
                       ui=True)
        assert_not_in('enableremote local_target failed', cml.out)

    GitRepo(target_path, create=False)  # raises if not a git repo
    assert_in("local_target", source.repo.get_remotes())
    # Both must be annex or git repositories
    src_is_annex = AnnexRepo.is_valid_repo(src_path)
    eq_(src_is_annex, AnnexRepo.is_valid_repo(target_path))
    # And target one should be known to have a known UUID within the source if annex
    if src_is_annex:
        annex = AnnexRepo(src_path)
        local_target_cfg = annex.repo.remotes["local_target"].config_reader.get
        # basic config in place
        eq_(local_target_cfg('annex-ignore'), 'false')
        ok_(local_target_cfg('annex-uuid'))

    # do it again without force, but use a different name to avoid initial checks
    # for existing remotes:
    with assert_raises(RuntimeError) as cm:
        assert_create_sshwebserver(dataset=source,
                                   name="local_target_alt",
                                   sshurl="ssh://localhost",
                                   target_dir=target_path)
    ok_(
        text_type(cm.exception).startswith(
            "Target path %s already exists. And it fails to rmdir" %
            target_path))
    if src_is_annex:
        target_description = AnnexRepo(target_path,
                                       create=False).get_description()
        assert_not_equal(target_description, None)
        assert_not_equal(target_description, target_path)
        # on yoh's laptop TMPDIR is under HOME, so things start to become
        # tricky since then target_path is shortened and we would need to know
        # remote $HOME.  To not over-complicate and still test, test only for
        # the basename of the target_path
        ok_endswith(target_description, basename(target_path))
    # now, with force and correct url, which is also used to determine
    # target_dir
    # Note: on windows absolute path is not url conform. But this way it's easy
    # to test, that ssh path is correctly used.
    if not on_windows:
        # add random file under target_path, to explicitly test existing=replace
        open(opj(target_path, 'random'), 'w').write('123')

        assert_create_sshwebserver(
            dataset=source,
            name="local_target",
            sshurl="ssh://localhost" + target_path,
            publish_by_default='master',
            existing='replace',
            ui=True,
        )
        eq_("ssh://localhost" + urlquote(target_path),
            source.repo.get_remote_url("local_target"))
        ok_(source.repo.get_remote_url("local_target", push=True) is None)

        # ensure target tree actually replaced by source
        assert_false(exists(opj(target_path, 'random')))

        if src_is_annex:
            annex = AnnexRepo(src_path)
            local_target_cfg = annex.repo.remotes[
                "local_target"].config_reader.get
            eq_(local_target_cfg('annex-ignore'), 'false')
            eq_(local_target_cfg('annex-uuid').count('-'), 4)  # valid uuid
            # should be added too, even if URL matches prior state
            eq_(local_target_cfg('push'), 'master')

        # again, by explicitly passing urls. Since we are on localhost, the
        # local path should work:
        cpkwargs = dict(
            dataset=source,
            name="local_target",
            sshurl="ssh://localhost",
            target_dir=target_path,
            target_url=target_path,
            target_pushurl="ssh://localhost" + target_path,
            ui=True,
        )
        assert_create_sshwebserver(existing='replace', **cpkwargs)
        if src_is_annex:
            target_description = AnnexRepo(target_path,
                                           create=False).get_description()
            eq_(target_description, target_path)

        eq_(target_path, source.repo.get_remote_url("local_target"))
        eq_("ssh://localhost" + target_path,
            source.repo.get_remote_url("local_target", push=True))

        assert_publish_with_ui(target_path)

        # now, push should work:
        publish(dataset=source, to="local_target")

        # and we should be able to 'reconfigure'
        def process_digests_mtimes(digests, mtimes):
            # it should have triggered a hook, which would have created log and metadata files
            check_metadata = False
            for part in 'logs', 'metadata':
                metafiles = [
                    k for k in digests
                    if k.startswith(_path_('.git/datalad/%s/' % part))
                ]
                # This is in effect ONLY if we have "compatible" datalad installed on remote
                # end. ATM we don't have easy way to guarantee that AFAIK (yoh),
                # so let's not check/enforce (TODO)
                # assert(len(metafiles) >= 1)  # we might have 2 logs if timestamps do not collide ;)
                # Let's actually do it to some degree
                if part == 'logs':
                    # always should have those:
                    assert (len(metafiles) >= 1)
                    with open(opj(target_path, metafiles[0])) as f:
                        if 'no datalad found' not in f.read():
                            check_metadata = True
                if part == 'metadata':
                    eq_(len(metafiles), bool(check_metadata))
                for f in metafiles:
                    digests.pop(f)
                    mtimes.pop(f)
            # and just pop some leftovers from annex
            for f in list(digests):
                if f.startswith('.git/annex/mergedrefs'):
                    digests.pop(f)
                    mtimes.pop(f)

        orig_digests, orig_mtimes = get_mtimes_and_digests(target_path)
        process_digests_mtimes(orig_digests, orig_mtimes)

        import time
        time.sleep(0.1)  # just so that mtimes change
        assert_create_sshwebserver(existing='reconfigure', **cpkwargs)
        digests, mtimes = get_mtimes_and_digests(target_path)
        process_digests_mtimes(digests, mtimes)

        assert_dict_equal(orig_digests,
                          digests)  # nothing should change in terms of content

        # but some files should have been modified
        modified_files = {
            k
            for k in mtimes if orig_mtimes.get(k, 0) != mtimes.get(k, 0)
        }
        # collect which files were expected to be modified without incurring any changes
        ok_modified_files = {
            _path_('.git/hooks/post-update'),
            'index.html',
            # files which hook would manage to generate
            _path_('.git/info/refs'),
            '.git/objects/info/packs'
        }
        # on elderly git we don't change receive setting
        ok_modified_files.add(_path_('.git/config'))
        ok_modified_files.update(
            {f
             for f in digests if f.startswith(_path_('.git/datalad/web'))})
        # it seems that with some recent git behavior has changed a bit
        # and index might get touched
        if _path_('.git/index') in modified_files:
            ok_modified_files.add(_path_('.git/index'))
        assert_set_equal(modified_files, ok_modified_files)
Beispiel #47
0
def test_unlock(path):

    ds = Dataset(path)

    # file is currently locked:
    # TODO: use get_annexed_files instead of hardcoded filename
    assert_raises(IOError, open, opj(path, 'test-annex.dat'), "w")

    # in V6+ we can unlock even if the file's content isn't present:
    if ds.repo.supports_unlocked_pointers:
        res = ds.unlock()
        assert_result_count(res, 1)
        assert_status('ok', res)
        # TODO: RF: make 'lock' a command as well
        # re-lock to further on have a consistent situation with V5:
        ds.repo._git_custom_command('test-annex.dat', ['git', 'annex', 'lock'])
    else:
        # cannot unlock without content (annex get wasn't called)
        assert_raises(CommandError, ds.unlock)  # FIXME

    ds.repo.get('test-annex.dat')
    result = ds.unlock()
    assert_result_count(result, 1)
    assert_in_results(result, path=opj(ds.path, 'test-annex.dat'), status='ok')

    with open(opj(path, 'test-annex.dat'), "w") as f:
        f.write("change content")

    ds.repo.add('test-annex.dat')
    # in V6+ we need to explicitly re-lock it:
    if ds.repo.supports_unlocked_pointers:
        # TODO: RF: make 'lock' a command as well
        # re-lock to further on have a consistent situation with V5:
        ds.repo._git_custom_command('test-annex.dat', ['git', 'annex', 'lock'])
    ds.repo.commit("edit 'test-annex.dat' via unlock and lock it again")

    # after commit, file is locked again:
    assert_raises(IOError, open, opj(path, 'test-annex.dat'), "w")

    # content was changed:
    with open(opj(path, 'test-annex.dat'), "r") as f:
        eq_("change content", f.read())

    # unlock again, this time more specific:
    result = ds.unlock(path='test-annex.dat')
    assert_result_count(result, 1)

    assert_in_results(result, path=opj(ds.path, 'test-annex.dat'), status='ok')

    with open(opj(path, 'test-annex.dat'), "w") as f:
        f.write("change content again")

    ds.repo.add('test-annex.dat')
    # in V6+ we need to explicitly re-lock it:
    if ds.repo.supports_unlocked_pointers:
        # TODO: RF: make 'lock' a command as well
        # re-lock to further on have a consistent situation with V5:
        ds.repo._git_custom_command('test-annex.dat', ['git', 'annex', 'lock'])
    ds.repo.commit("edit 'test-annex.dat' via unlock and lock it again")

    # TODO:
    # BOOOM: test-annex.dat writeable in V6!
    # Why the hell is this different than the first time we wrote to the file
    # and locked it again?
    # Also: After opening the file is empty.

    # after commit, file is locked again:
    assert_raises(IOError, open, opj(path, 'test-annex.dat'), "w")

    # content was changed:
    with open(opj(path, 'test-annex.dat'), "r") as f:
        eq_("change content again", f.read())
def test_formatter_no_mapping_arg():
    fmt = au.Formatter({})
    assert_raises(ValueError, fmt.format, "{0}", "not a mapping")
Beispiel #49
0
def test_run_assume_ready(path):
    ds = Dataset(path).create()
    repo = ds.repo
    adjusted = repo.is_managed_branch()

    # --assume-ready=inputs

    (repo.pathobj / "f1").write_text("f1")
    ds.save()

    def cat_cmd(fname):
        return [
            sys.executable, "-c",
            "import sys; print(open(sys.argv[-1]).read())", fname
        ]

    assert_in_results(ds.run(cat_cmd("f1"), inputs=["f1"]),
                      action="get",
                      type="file")
    # Same thing, but without the get() call.
    assert_not_in_results(ds.run(cat_cmd("f1"),
                                 inputs=["f1"],
                                 assume_ready="inputs"),
                          action="get",
                          type="file")

    ds.drop("f1", check=False)
    if not adjusted:
        # If the input is not actually ready, the command will fail.
        with assert_raises(CommandError):
            ds.run(cat_cmd("f1"), inputs=["f1"], assume_ready="inputs")

    # --assume-ready=outputs

    def unlink_and_write_cmd(fname):
        # This command doesn't care whether the output file is unlocked because
        # it removes it ahead of time anyway.
        return [
            sys.executable, "-c",
            "import sys; import os; import os.path as op; "
            "f = sys.argv[-1]; op.lexists(f) and os.unlink(f); "
            "open(f, mode='w').write(str(sys.argv))", fname
        ]

    (repo.pathobj / "f2").write_text("f2")
    ds.save()

    res = ds.run(unlink_and_write_cmd("f2"), outputs=["f2"])
    if not adjusted:
        assert_in_results(res, action="unlock", type="file")
    # Same thing, but without the unlock() call.
    res = ds.run(unlink_and_write_cmd("f2"),
                 outputs=["f2"],
                 assume_ready="outputs")
    assert_not_in_results(res, action="unlock", type="file")

    # --assume-ready=both

    res = ds.run(unlink_and_write_cmd("f2"), outputs=["f2"], inputs=["f2"])
    assert_in_results(res, action="get", type="file")
    if not adjusted:
        assert_in_results(res, action="unlock", type="file")

    res = ds.run(unlink_and_write_cmd("f2"),
                 outputs=["f2"],
                 inputs=["f2"],
                 assume_ready="both")
    assert_not_in_results(res, action="get", type="file")
    assert_not_in_results(res, action="unlock", type="file")
def test_formatter_no_idx_map():
    fmt = au.Formatter({})
    assert_raises(KeyError, fmt.format, "{0}", {"col0": "value0"})
Beispiel #51
0
 def ask():
     assert_raises(ValueError, cfg.obtain, dummy, store=True)
 def test_addurls_url_filename_fail(self, path):
     ds = Dataset(path).create(force=True)
     assert_raises(IncompleteResultsError, ds.addurls, self.json_file,
                   "{url}/nofilename/", "{_url0}/{_url_filename}")
Beispiel #53
0
 def ask():
     assert_raises(ValueError, cfg.obtain, dummy)
    def test_addurls(self, path):
        ds = Dataset(path).create(force=True)

        def get_annex_commit_counts():
            return len(ds.repo.get_revisions("git-annex"))

        n_annex_commits = get_annex_commit_counts()

        # Meanwhile also test that we can specify path relative
        # to the top of the dataset, as we generally treat paths in
        # Python API, and it will be the one saved in commit
        # message record
        json_file = op.relpath(self.json_file, ds.path)

        ds.addurls(json_file,
                   "{url}",
                   "{name}",
                   exclude_autometa="(md5sum|size)")
        ok_startswith(ds.repo.format_commit('%b', DEFAULT_BRANCH),
                      f"url_file='{json_file}'")

        filenames = ["a", "b", "c"]
        for fname in filenames:
            ok_exists(op.join(ds.path, fname))

        for (fname, meta), subdir in zip(ds.repo.get_metadata(filenames),
                                         ["foo", "bar", "foo"]):
            assert_dict_equal(meta, {"subdir": [subdir], "name": [fname]})

        # Ignore this check if we're faking dates because that disables
        # batch mode.
        # Also ignore if on Windows as it seems as if a git-annex bug
        # leads to separate meta data commits:
        # https://github.com/datalad/datalad/pull/5202#discussion_r535429704
        if not (dl_cfg.get('datalad.fake-dates') or on_windows):
            # We should have two new commits on the git-annex: one for the
            # added urls and one for the added metadata.
            eq_(n_annex_commits + 2, get_annex_commit_counts())

        # Add to already existing links, overwriting.
        with swallow_logs(new_level=logging.DEBUG) as cml:
            ds.addurls(self.json_file, "{url}", "{name}", ifexists="overwrite")
            for fname in filenames:
                assert_in("Removing {}".format(os.path.join(path, fname)),
                          cml.out)

        # Add to already existing links, skipping.
        assert_in_results(ds.addurls(self.json_file,
                                     "{url}",
                                     "{name}",
                                     ifexists="skip"),
                          action="addurls",
                          status="notneeded")

        # Add to already existing links works, as long content is the same.
        ds.addurls(self.json_file, "{url}", "{name}")

        # But it fails if something has changed.
        ds.unlock("a")
        with open(op.join(ds.path, "a"), "w") as ofh:
            ofh.write("changed")
        ds.save("a")

        assert_raises(IncompleteResultsError, ds.addurls, self.json_file,
                      "{url}", "{name}")
Beispiel #55
0
 def ask():
     # fail on unkown dialog type
     assert_raises(ValueError, cfg.obtain, dummy, dialog_type='Rorschach_test')
def test_addurls_nonannex_repo(path):
    ds = Dataset(path).create(force=True, annex=False)
    with assert_raises(IncompleteResultsError) as raised:
        ds.addurls("dummy_arg0", "dummy_arg1", "dummy_arg2")
    assert_in("not an annex repo", str(raised.exception))
def test_container_from_subdataset(ds_path, src_subds_path, local_file):

    # prepare a to-be subdataset with a registered container
    src_subds = Dataset(src_subds_path).create()
    src_subds.containers_add(name="first",
                             url=get_local_file_url(
                                 op.join(local_file, 'some_container.img')))
    # add it as subdataset to a super ds:
    ds = Dataset(ds_path).create()
    subds = ds.install("sub", source=src_subds_path)
    # add it again one level down to see actual recursion:
    subds.install("subsub", source=src_subds_path)

    # We come up empty without recursive:
    res = ds.containers_list(recursive=False, **RAW_KWDS)
    assert_result_count(res, 0)

    # query available containers from within super:
    res = ds.containers_list(recursive=True, **RAW_KWDS)
    assert_result_count(res, 2)
    assert_in_results(res, action="containers", refds=ds.path)

    # default location within the subdataset:
    target_path = op.join(subds.path, '.datalad', 'environments', 'first',
                          'image')
    assert_result_count(res,
                        1,
                        name='sub/first',
                        type='file',
                        action='containers',
                        status='ok',
                        path=target_path,
                        parentds=subds.path)

    # not installed subdataset doesn't pose an issue:
    sub2 = ds.create("sub2")
    assert_result_count(ds.subdatasets(), 2, type="dataset")
    ds.uninstall("sub2")
    from datalad.tests.utils import assert_false
    assert_false(sub2.is_installed())

    # same results as before, not crashing or somehow confused by a not present
    # subds:
    res = ds.containers_list(recursive=True, **RAW_KWDS)
    assert_result_count(res, 2)
    assert_result_count(res,
                        1,
                        name='sub/first',
                        type='file',
                        action='containers',
                        status='ok',
                        path=target_path,
                        parentds=subds.path)

    # The default renderer includes the image names.
    with swallow_outputs() as out:
        ds.containers_list(recursive=True)
        lines = out.out.splitlines()
    assert_re_in("sub/first", lines)
    assert_re_in("sub/subsub/first", lines)
    # But we are careful not to render partial names from subdataset traversals
    # (i.e. we recurse with containers_list(..., result_renderer=None)).
    with assert_raises(AssertionError):
        assert_re_in("subsub/first", lines)
def test_extract_wrong_input_type():
    assert_raises(ValueError, au._read, None, "invalid_input_type")
def test_diff(path, norepo):
    with chpwd(norepo):
        assert_raises(NoDatasetFound, diff)
    ds = Dataset(path).create()
    assert_repo_status(ds.path)
    # reports stupid revision input
    assert_result_count(ds.diff(fr='WTF',
                                on_failure='ignore',
                                result_renderer=None),
                        1,
                        status='impossible',
                        message="Git reference 'WTF' invalid")
    # no diff
    assert_result_count(_dirty_results(ds.diff(result_renderer=None)), 0)
    assert_result_count(
        _dirty_results(ds.diff(fr='HEAD', result_renderer=None)), 0)
    # bogus path makes no difference
    assert_result_count(
        _dirty_results(ds.diff(path='THIS', fr='HEAD', result_renderer=None)),
        0)
    # let's introduce a known change
    create_tree(ds.path, {'new': 'empty'})
    ds.save(to_git=True)
    assert_repo_status(ds.path)

    if ds.repo.is_managed_branch():
        fr_base = DEFAULT_BRANCH
        to = DEFAULT_BRANCH
    else:
        fr_base = "HEAD"
        to = None

    res = _dirty_results(
        ds.diff(fr=fr_base + '~1', to=to, result_renderer=None))
    assert_result_count(res, 1)
    assert_result_count(res,
                        1,
                        action='diff',
                        path=op.join(ds.path, 'new'),
                        state='added')
    # we can also find the diff without going through the dataset explicitly
    with chpwd(ds.path):
        assert_result_count(_dirty_results(
            diff(fr=fr_base + '~1', to=to, result_renderer=None)),
                            1,
                            action='diff',
                            path=op.join(ds.path, 'new'),
                            state='added')
    # no diff against HEAD
    assert_result_count(_dirty_results(ds.diff(result_renderer=None)), 0)
    # modify known file
    create_tree(ds.path, {'new': 'notempty'})
    res = _dirty_results(ds.diff(result_renderer=None))
    assert_result_count(res, 1)
    assert_result_count(res,
                        1,
                        action='diff',
                        path=op.join(ds.path, 'new'),
                        state='modified')
    # but if we give another path, it doesn't show up
    assert_result_count(ds.diff(path='otherpath', result_renderer=None), 0)
    # giving the right path must work though
    assert_result_count(ds.diff(path='new', result_renderer=None),
                        1,
                        action='diff',
                        path=op.join(ds.path, 'new'),
                        state='modified')
    # stage changes
    ds.repo.add('.', git=True)
    # no change in diff, staged is not committed
    assert_result_count(_dirty_results(ds.diff(result_renderer=None)), 1)
    ds.save()
    assert_repo_status(ds.path)
    assert_result_count(_dirty_results(ds.diff(result_renderer=None)), 0)

    # untracked stuff
    create_tree(ds.path, {'deep': {'down': 'untracked', 'down2': 'tobeadded'}})
    # a plain diff should report the untracked file
    # but not directly, because the parent dir is already unknown
    res = _dirty_results(ds.diff(result_renderer=None))
    assert_result_count(res, 1)
    assert_result_count(res,
                        1,
                        state='untracked',
                        type='directory',
                        path=op.join(ds.path, 'deep'))
    # report of individual files is also possible
    assert_result_count(ds.diff(untracked='all', result_renderer=None),
                        2,
                        state='untracked',
                        type='file')
    # an unmatching path will hide this result
    assert_result_count(ds.diff(path='somewhere', result_renderer=None), 0)
    # perfect match and anything underneath will do
    assert_result_count(ds.diff(path='deep', result_renderer=None),
                        1,
                        state='untracked',
                        path=op.join(ds.path, 'deep'),
                        type='directory')
    assert_result_count(ds.diff(path='deep', result_renderer=None),
                        1,
                        state='untracked',
                        path=op.join(ds.path, 'deep'))
    ds.repo.add(op.join('deep', 'down2'), git=True)
    # now the remaining file is the only untracked one
    assert_result_count(ds.diff(result_renderer=None),
                        1,
                        state='untracked',
                        path=op.join(ds.path, 'deep', 'down'),
                        type='file')
Beispiel #60
0
def test_implicit_install(src, dst):

    origin_top = create(src)
    origin_sub = origin_top.create("sub")
    origin_subsub = origin_sub.create("subsub")
    with open(opj(origin_top.path, "file1.txt"), "w") as f:
        f.write("content1")
    origin_top.add("file1.txt")
    with open(opj(origin_sub.path, "file2.txt"), "w") as f:
        f.write("content2")
    origin_sub.add("file2.txt")
    with open(opj(origin_subsub.path, "file3.txt"), "w") as f:
        f.write("content3")
    origin_subsub.add("file3.txt")
    origin_top.save(recursive=True)

    # first, install toplevel:
    ds = install(dst, source=src)
    ok_(ds.is_installed())

    sub = Dataset(opj(ds.path, "sub"))
    ok_(not sub.is_installed())
    subsub = Dataset(opj(sub.path, "subsub"))
    ok_(not subsub.is_installed())

    # fail on obscure non-existing one
    assert_raises(IncompleteResultsError, ds.install, source='obscure')

    # install 3rd level and therefore implicitly the 2nd:
    result = ds.install(path=opj("sub", "subsub"))
    ok_(sub.is_installed())
    ok_(subsub.is_installed())
    # but by default implicit results are not reported
    eq_(result, subsub)

    # fail on obscure non-existing one in subds
    assert_raises(IncompleteResultsError,
                  ds.install,
                  source=opj('sub', 'obscure'))

    # clean up, the nasty way
    rmtree(dst, chmod_files=True)
    ok_(not exists(dst))

    # again first toplevel:
    ds = install(dst, source=src)
    ok_(ds.is_installed())
    sub = Dataset(opj(ds.path, "sub"))
    ok_(not sub.is_installed())
    subsub = Dataset(opj(sub.path, "subsub"))
    ok_(not subsub.is_installed())

    # now implicit but without an explicit dataset to install into
    # (deriving from CWD):
    with chpwd(dst):
        # don't ask for the file content to make return value comparison
        # simpler
        result = get(path=opj("sub", "subsub"),
                     get_data=False,
                     result_xfm='datasets')
        ok_(sub.is_installed())
        ok_(subsub.is_installed())
        eq_(result, [sub, subsub])