Example #1
0
def test_add_readme(path):
    ds = Dataset(path).create(force=True)
    ds.save()
    ds.aggregate_metadata()
    ok_clean_git(ds.path)
    assert_status('ok', ds.add_readme())
    # should use default name
    eq_(
        open(opj(path, 'README.md')).read(),
        """\
# Dataset "demo_ds"

this is for play

### Authors

- Betty
- Tom

### License

PDDL

## General information

This is a DataLad dataset (id: {id}).

For more information on DataLad and on how to work with its datasets,
see the DataLad documentation at: http://docs.datalad.org
""".format(
    id=ds.id))

    # should skip on re-run
    assert_status('notneeded', ds.add_readme())
Example #2
0
def test_invalid_call(origin, tdir):
    ds = Dataset(origin)
    ds.uninstall('subm 1', check=False)
    # nothing
    assert_status('error', publish('/notthere', on_failure='ignore'))
    # known, but not present
    assert_status('impossible', publish(opj(ds.path, 'subm 1'), on_failure='ignore'))
    # --since without dataset is now supported as long as it
    # could be identified
    # assert_raises(InsufficientArgumentsError, publish, since='HEAD')
    # but if it couldn't be, then should indeed crash
    with chpwd(tdir):
        assert_raises(InsufficientArgumentsError, publish, since='HEAD')
    # new dataset, with unavailable subdataset
    dummy = Dataset(tdir).create()
    dummy_sub = dummy.create('sub')
    dummy_sub.uninstall()
    assert_in('sub', dummy.subdatasets(fulfilled=False, result_xfm='relpaths'))
    # now an explicit call to publish the unavailable subdataset
    assert_result_count(
        dummy.publish('sub', on_failure='ignore'),
        1,
        path=dummy_sub.path,
        status='impossible',
        type='dataset')
Example #3
0
def test_here(path):
    # few smoke tests regarding the 'here' sibling
    ds = create(path)
    res = ds.siblings(
        'query',
        on_failure='ignore',
        result_renderer=None)
    assert_status('ok', res)
    assert_result_count(res, 1)
    assert_result_count(res, 1, name='here')
    here = res[0]
    eq_(ds.repo.uuid, here['annex-uuid'])
    assert_in('annex-description', here)
    assert_in('annex-bare', here)
    assert_in('available_local_disk_space', here)

    # set a description
    res = ds.siblings(
        'configure',
        name='here',
        description='very special',
        on_failure='ignore',
        result_renderer=None)
    assert_status('ok', res)
    assert_result_count(res, 1)
    assert_result_count(res, 1, name='here')
    here = res[0]
    eq_('very special', here['annex-description'])
Example #4
0
def test_get_recurse_dirs(o_path, c_path):

    # prepare source:
    origin = Dataset(o_path).create(force=True)
    origin.add('.')

    ds = install(
        c_path, source=o_path,
        result_xfm='datasets', return_type='item-or-list')

    file_list = ['file1.txt',
                 opj('subdir', 'file2.txt'),
                 opj('subdir', 'subsubdir', 'file3.txt'),
                 opj('subdir', 'subsubdir', 'file4.txt')]
    files_in_sub = [f for f in file_list if f.startswith(with_pathsep('subdir'))]

    # no content present:
    ok_(not any(ds.repo.file_has_content(file_list)))

    result = ds.get('subdir')

    # check result:
    assert_status('ok', result)
    eq_(set([item.get('path')[len(ds.path) + 1:] for item in result
             if item['type'] == 'file']),
        set(files_in_sub))
    # we also get one report on the subdir
    eq_(len(result) - 1, len(files_in_sub))

    # got all files beneath subdir:
    ok_(all(ds.repo.file_has_content(files_in_sub)))

    # additionally got file1.txt silently, since it has the same content as
    # subdir/subsubdir/file4.txt:
    ok_(ds.repo.file_has_content('file1.txt') is True)
Example #5
0
def test_install_list(path, top_path):

    # we want to be able to install several things, if these are known
    # (no 'source' allowed). Therefore first toplevel:
    ds = install(top_path, source=path, recursive=False)
    assert_not_in('annex.hardlink', ds.config)
    ok_(ds.is_installed())
    sub1 = Dataset(opj(top_path, 'subm 1'))
    sub2 = Dataset(opj(top_path, '2'))
    ok_(not sub1.is_installed())
    ok_(not sub2.is_installed())

    # fails, when `source` is passed:
    assert_raises(ValueError, ds.install,
                  path=['subm 1', '2'],
                  source='something')

    # now should work:
    result = ds.install(path=['subm 1', '2'], result_xfm='paths')
    ok_(sub1.is_installed())
    ok_(sub2.is_installed())
    eq_(set(result), {sub1.path, sub2.path})
    # and if we request it again via get, result should be empty
    get_result = ds.get(path=['subm 1', '2'], get_data=False)
    assert_status('notneeded', get_result)
Example #6
0
def test_drop_nocrash_absent_subds(path):
    parent = Dataset(path).create()
    sub = parent.create('sub')
    parent.uninstall('sub')
    ok_clean_git(parent.path)
    with chpwd(path):
        assert_status('notneeded', drop('.', recursive=True))
Example #7
0
def test_get_mixed_hierarchy(src, path):

    origin = Dataset(src).create(no_annex=True)
    origin_sub = origin.create('subds')
    with open(opj(origin.path, 'file_in_git.txt'), "w") as f:
        f.write('no idea')
    with open(opj(origin_sub.path, 'file_in_annex.txt'), "w") as f:
        f.write('content')
    origin.add('file_in_git.txt', to_git=True)
    origin_sub.add('file_in_annex.txt')
    origin.save()

    # now, install that thing:
    ds, subds = install(
        path, source=src, recursive=True,
        result_xfm='datasets', return_type='item-or-list', result_filter=None)
    ok_(subds.repo.file_has_content("file_in_annex.txt") is False)

    # and get:
    result = ds.get(curdir, recursive=True)
    # git repo and subds
    assert_status(['ok', 'notneeded'], result)
    assert_result_count(
        result, 1, path=opj(subds.path, "file_in_annex.txt"), status='ok')
    ok_(subds.repo.file_has_content("file_in_annex.txt") is True)
Example #8
0
def test_subdataset_save(path):
    parent = Dataset(path).create()
    sub = parent.create('sub')
    assert_repo_status(parent.path)
    create_tree(parent.path, {
        "untracked": 'ignore',
        'sub': {
            "new": "wanted"}})
    sub.save('new')
    # defined state: one untracked, modified (but clean in itself) subdataset
    assert_repo_status(sub.path)
    assert_repo_status(parent.path, untracked=['untracked'], modified=['sub'])

    # `save sub` does not save the parent!!
    with chpwd(parent.path):
        assert_status('notneeded', save(dataset=sub.path))
    assert_repo_status(parent.path, untracked=['untracked'], modified=['sub'])
    # `save -u .` saves the state change in the subdataset,
    # but leaves any untracked content alone
    with chpwd(parent.path):
        assert_status('ok', parent.save(updated=True))
    assert_repo_status(parent.path, untracked=['untracked'])

    # get back to the original modified state and check that -S behaves in
    # exactly the same way
    create_tree(parent.path, {
        'sub': {
            "new2": "wanted2"}})
    sub.save('new2')
    assert_repo_status(parent.path, untracked=['untracked'], modified=['sub'])
Example #9
0
def test_status_basics(path, linkpath, otherdir):
    if not on_windows:
        # make it more complicated by default
        ut.Path(linkpath).symlink_to(path, target_is_directory=True)
        path = linkpath

    with chpwd(path):
        assert_raises(NoDatasetArgumentFound, status)
    ds = Dataset(path).create()
    # outcome identical between ds= and auto-discovery
    with chpwd(path):
        assert_raises(IncompleteResultsError, status, path=otherdir)
        stat = status()
    eq_(stat, ds.status())
    assert_status('ok', stat)
    # we have a bunch of reports (be vague to be robust to future changes
    assert len(stat) > 2
    # check the composition
    for s in stat:
        eq_(s['status'], 'ok')
        eq_(s['action'], 'status')
        eq_(s['state'], 'clean')
        eq_(s['type'], 'file')
        assert_in('gitshasum', s)
        assert_in('bytesize', s)
        eq_(s['refds'], ds.path)
Example #10
0
def test_audio(path):
    ds = Dataset(path).create()
    ds.config.add('datalad.metadata.nativetype', 'audio', where='dataset')
    copy(
        opj(dirname(dirname(dirname(__file__))), 'tests', 'data', 'audio.mp3'),
        path)
    ds.add('.')
    ok_clean_git(ds.path)
    res = ds.aggregate_metadata()
    assert_status('ok', res)
    res = ds.metadata('audio.mp3')
    assert_result_count(res, 1)

    # from this extractor
    meta = res[0]['metadata']['audio']
    for k, v in target.items():
        eq_(meta[k], v)

    assert_in('@context', meta)

    uniques = ds.metadata(
        reporton='datasets', return_type='item-or-list')['metadata']['datalad_unique_content_properties']
    # test file has it, but uniques have it blanked out, because the extractor considers it worthless
    # for discovering whole datasets
    assert_in('bitrate', meta)
    eq_(uniques['audio']['bitrate'], None)

    # 'date' field carries not value, hence gets exclude from the unique report
    assert_in('date', meta)
    assert(not meta['date'])
    assert_not_in('date', uniques['audio'])
Example #11
0
def test_notclone_known_subdataset(src, path):
    # get the superdataset:
    ds = clone(src, path,
               result_xfm='datasets', return_type='item-or-list')

    # subdataset not installed:
    subds = Dataset(opj(path, 'subm 1'))
    assert_false(subds.is_installed())
    assert_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths'))
    assert_not_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths'))
    # clone is not meaningful
    res = ds.clone('subm 1', on_failure='ignore')
    assert_status('error', res)
    assert_message('Failed to clone from any candidate source URL. '
                   'Encountered errors per each url were: %s',
                   res)
    # get does the job
    res = ds.get(path='subm 1', get_data=False)
    assert_status('ok', res)
    ok_(subds.is_installed())
    ok_(AnnexRepo.is_valid_repo(subds.path, allow_noninitialized=False))
    # Verify that it is the correct submodule installed and not
    # new repository initiated
    eq_(set(subds.repo.get_indexed_files()),
        {'test.dat', 'INFO.txt', 'test-annex.dat'})
    assert_not_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths'))
    assert_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths'))
Example #12
0
def test_install_simple_local(src, path):
    origin = Dataset(path)

    # now install it somewhere else
    ds = install(path, source=src, description='mydummy')
    eq_(ds.path, path)
    ok_(ds.is_installed())
    if not isinstance(origin.repo, AnnexRepo):
        # this means it is a GitRepo
        ok_(isinstance(origin.repo, GitRepo))
        # stays plain Git repo
        ok_(isinstance(ds.repo, GitRepo))
        ok_(not isinstance(ds.repo, AnnexRepo))
        ok_(GitRepo.is_valid_repo(ds.path))
        eq_(set(ds.repo.get_indexed_files()),
            {'test.dat', 'INFO.txt'})
        ok_clean_git(path, annex=False)
    else:
        # must be an annex
        ok_(isinstance(ds.repo, AnnexRepo))
        ok_(AnnexRepo.is_valid_repo(ds.path, allow_noninitialized=False))
        eq_(set(ds.repo.get_indexed_files()),
            {'test.dat', 'INFO.txt', 'test-annex.dat'})
        ok_clean_git(path, annex=True)
        # no content was installed:
        ok_(not ds.repo.file_has_content('test-annex.dat'))
        uuid_before = ds.repo.uuid
        eq_(ds.repo.get_description(), 'mydummy')

    # installing it again, shouldn't matter:
    res = install(path, source=src, result_xfm=None, return_type='list')
    assert_status('notneeded', res)
    ok_(ds.is_installed())
    if isinstance(origin.repo, AnnexRepo):
        eq_(uuid_before, ds.repo.uuid)
Example #13
0
def test_failed_clone(dspath):
    ds = create(dspath)
    res = ds.clone("http://nonexistingreallyanything.datalad.org/bla", "sub",
                   on_failure='ignore')
    assert_status('error', res)
    assert_message('Failed to clone from any candidate source URL. '
                   'Encountered errors per each url were: %s',
                   res)
Example #14
0
def test_invalid_call(path):
    with chpwd(path):
        # no dataset, no luck
        assert_raises(NoDatasetArgumentFound, run, 'doesntmatter')
        # dirty dataset
        ds = Dataset(path).create()
        create_tree(ds.path, {'this': 'dirty'})
        assert_status('impossible', run('doesntmatter', on_failure='ignore'))
Example #15
0
def test_uninstall_invalid(path):
    ds = Dataset(path).create(force=True)
    for method in (uninstall, remove, drop):
        assert_raises(InsufficientArgumentsError, method)
        # refuse to touch stuff outside the dataset
        assert_status('error', method(dataset=ds, path='..', on_failure='ignore'))
        # same if it doesn't exist, for consistency
        assert_status('error', method(dataset=ds, path='../madeupnonexist', on_failure='ignore'))
Example #16
0
def test_safetynet(path):
    ds = Dataset(path).create()
    os.makedirs(opj(ds.path, 'deep', 'down'))
    for p in (ds.path, opj(ds.path, 'deep'), opj(ds.path, 'deep', 'down')):
        with chpwd(p):
            # will never remove PWD, or anything outside the dataset
            for target in (ds.path, os.curdir, os.pardir, opj(os.pardir, os.pardir)):
                assert_status(
                    ('error', 'impossible'),
                    uninstall(path=target, on_failure='ignore'))
Example #17
0
def test_replace_and_relative_sshpath(src_path, dst_path):
    # We need to come up with the path relative to our current home directory
    # https://github.com/datalad/datalad/issues/1653
    # but because we override HOME the HOME on the remote end would be
    # different even though a localhost. So we need to query it
    from datalad import ssh_manager
    ssh = ssh_manager.get_connection('localhost')
    remote_home, err = ssh('pwd')
    assert not err
    remote_home = remote_home.rstrip('\n')
    dst_relpath = os.path.relpath(dst_path, remote_home)
    url = 'localhost:%s' % dst_relpath
    ds = Dataset(src_path).create()
    create_tree(ds.path, {'sub.dat': 'lots of data'})
    ds.save('sub.dat')
    ds.create_sibling(url, ui=True)
    published = ds.publish(to='localhost', transfer_data='all')
    assert_result_count(published, 1, path=opj(ds.path, 'sub.dat'))
    # verify that hook runs and there is nothing in stderr
    # since it exits with 0 exit even if there was a problem
    out, err = Runner(cwd=opj(dst_path, '.git'))(_path_('hooks/post-update'))
    assert_false(out)
    assert_false(err)

    # Verify that we could replace and publish no problem
    # https://github.com/datalad/datalad/issues/1656
    # Strangely it spits outs IncompleteResultsError exception atm... so just
    # checking that it fails somehow
    res = ds.create_sibling(url, on_failure='ignore')
    assert_status('error', res)
    assert_in('already configured', res[0]['message'][0])
    # "Settings" such as UI do not persist, so we specify it again
    # for the test below depending on it
    ds.create_sibling(url, existing='replace', ui=True)
    published2 = ds.publish(to='localhost', transfer_data='all')
    assert_result_count(published2, 1, path=opj(ds.path, 'sub.dat'))

    # and one more test since in above test it would not puke ATM but just
    # not even try to copy since it assumes that file is already there
    create_tree(ds.path, {'sub2.dat': 'more data'})
    ds.save('sub2.dat')
    published3 = ds.publish(to='localhost', transfer_data='none')  # we publish just git
    assert_result_count(published3, 0, path=opj(ds.path, 'sub2.dat'))
    # now publish "with" data, which should also trigger the hook!
    # https://github.com/datalad/datalad/issues/1658
    from glob import glob
    from datalad.consts import WEB_META_LOG
    logs_prior = glob(_path_(dst_path, WEB_META_LOG, '*'))
    published4 = ds.publish(to='localhost', transfer_data='all')
    assert_result_count(published4, 1, path=opj(ds.path, 'sub2.dat'))
    logs_post = glob(_path_(dst_path, WEB_META_LOG, '*'))
    eq_(len(logs_post), len(logs_prior) + 1)

    assert_postupdate_hooks(dst_path)
Example #18
0
def test_get_single_file(path):

    ds = Dataset(path)
    ok_(ds.is_installed())
    ok_(ds.repo.file_has_content('test-annex.dat') is False)
    result = ds.get("test-annex.dat")
    assert_result_count(result, 1)
    assert_status('ok', result)
    eq_(result[0]['path'], opj(ds.path, 'test-annex.dat'))
    eq_(result[0]['annexkey'], ds.repo.get_file_key('test-annex.dat'))
    ok_(ds.repo.file_has_content('test-annex.dat') is True)
Example #19
0
def test_clone_crcns(tdir, ds_path):
    with chpwd(tdir):
        res = clone('///', path="all-nonrecursive", on_failure='ignore')
        assert_status('ok', res)

    # again, but into existing dataset:
    ds = create(ds_path)
    crcns = ds.clone("///crcns", result_xfm='datasets', return_type='item-or-list')
    ok_(crcns.is_installed())
    eq_(crcns.path, opj(ds_path, "crcns"))
    assert_in(crcns.path, ds.subdatasets(result_xfm='paths'))
Example #20
0
def test_update_strategy(path):
    base = Dataset(opj(path, 'origin')).create(force=True)
    # force all metadata objects into the annex
    with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f:
        f.write(
            '** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n')
    sub = base.create('sub', force=True)
    subsub = sub.create(opj('subsub'), force=True)
    base.add('.', recursive=True)
    ok_clean_git(base.path)
    # we start clean
    for ds in base, sub, subsub:
        eq_(len(_get_contained_objs(ds)), 0)
    # aggregate the base dataset only, nothing below changes
    base.aggregate_metadata()
    eq_(len(_get_contained_objs(base)), 2)
    for ds in sub, subsub:
        eq_(len(_get_contained_objs(ds)), 0)
    # aggregate the entire tree, but by default only updates
    # the top-level dataset with all objects, none of the leaf
    # or intermediate datasets get's touched
    base.aggregate_metadata(recursive=True)
    eq_(len(_get_contained_objs(base)), 6)
    eq_(len(_get_referenced_objs(base)), 6)
    for ds in sub, subsub:
        eq_(len(_get_contained_objs(ds)), 0)
    res = base.metadata(get_aggregates=True)
    assert_result_count(res, 3)
    # it is impossible to query an intermediate or leaf dataset
    # for metadata
    for ds in sub, subsub:
        assert_status(
            'impossible',
            ds.metadata(get_aggregates=True, on_failure='ignore'))
    # get the full metadata report
    target_meta = base.metadata(return_type='list')

    # now redo full aggregation, this time updating all
    # (intermediate) datasets
    base.aggregate_metadata(recursive=True, update_mode='all')
    eq_(len(_get_contained_objs(base)), 6)
    eq_(len(_get_contained_objs(sub)), 4)
    eq_(len(_get_contained_objs(subsub)), 2)
    # it is now OK to query an intermediate or leaf dataset
    # for metadata
    for ds in sub, subsub:
        assert_status(
            'ok',
            ds.metadata(get_aggregates=True, on_failure='ignore'))

    # all of that has no impact on the reported metadata
    eq_(target_meta, base.metadata(return_type='list'))
Example #21
0
def test_clone_report_permission_issue(tdir):
    pdir = _path_(tdir, 'protected')
    mkdir(pdir)
    # make it read-only
    chmod(pdir, 0o555)
    with chpwd(pdir):
        res = clone('///', result_xfm=None, return_type='list', on_failure='ignore')
        assert_status('error', res)
        assert_result_count(
            res, 1, status='error',
            message="could not create work tree dir '%s/%s': Permission denied"
                    % (pdir, get_datasets_topdir())
        )
Example #22
0
def _test_target_ssh_inherit(standardgroup, src_path, target_path):
    ds = Dataset(src_path).create()
    target_url = 'localhost:%s' % target_path
    remote = "magical"
    # for the test of setting a group, will just smoke test while using current
    # user's group
    ds.create_sibling(target_url, name=remote, shared='group', group=os.getgid())  # not doing recursively
    if standardgroup:
        ds.repo.set_preferred_content('wanted', 'standard', remote)
        ds.repo.set_preferred_content('group', standardgroup, remote)
    ds.publish(to=remote)

    # now a month later we created a new subdataset
    subds = ds.create('sub')  # so now we got a hierarchy!
    create_tree(subds.path, {'sub.dat': 'lots of data'})
    subds.add('sub.dat')
    ok_file_under_git(subds.path, 'sub.dat', annexed=True)

    target_sub = Dataset(opj(target_path, 'sub'))
    # since we do not have yet/thus have not used an option to record to publish
    # to that sibling by default (e.g. --set-upstream), if we run just ds.publish
    # -- should fail
    assert_result_count(
        ds.publish(on_failure='ignore'),
        1,
        status='impossible',
        message='No target sibling configured for default publication, please specific via --to')
    ds.publish(to=remote)  # should be ok, non recursive; BUT it (git or us?) would
                  # create an empty sub/ directory
    ok_(not target_sub.is_installed())  # still not there
    res = ds.publish(to=remote, recursive=True, on_failure='ignore')
    assert_result_count(res, 2)
    assert_status(('error', 'notneeded'), res)
    assert_result_count(
        res, 1,
        status='error',
        message=("Unknown target sibling '%s' for publication", 'magical'))
    ds.publish(to=remote, recursive=True, missing='inherit')
    # we added the remote and set all the
    eq_(subds.repo.get_preferred_content('wanted', remote), 'standard' if standardgroup else '')
    eq_(subds.repo.get_preferred_content('group', remote), standardgroup or '')

    ok_(target_sub.is_installed())  # it is there now
    eq_(target_sub.repo.config.get('core.sharedrepository'), '1')
    # and we have transferred the content
    if standardgroup and standardgroup == 'backup':
        # only then content should be copied
        ok_file_has_content(opj(target_sub.path, 'sub.dat'), 'lots of data')
    else:
        # otherwise nothing is copied by default
        assert_false(target_sub.repo.file_has_content('sub.dat'))
Example #23
0
def test_run_explicit(path):
    ds = Dataset(path)

    assert_false(ds.repo.file_has_content("test-annex.dat"))

    create_tree(ds.path, {"dirt_untracked": "untracked",
                          "dirt_modified": "modified"})
    ds.save("dirt_modified", to_git=True)
    with open(op.join(path, "dirt_modified"), "a") as ofh:
        ofh.write(", more")

    # We need explicit=True to run with dirty repo.
    assert_status("impossible",
                  ds.run("cat test-annex.dat test-annex.dat >doubled.dat",
                         inputs=["test-annex.dat"],
                         on_failure="ignore"))

    hexsha_initial = ds.repo.get_hexsha()
    # If we specify test-annex.dat as an input, it will be retrieved before the
    # run.
    ds.run("cat test-annex.dat test-annex.dat >doubled.dat",
           inputs=["test-annex.dat"], explicit=True)
    ok_(ds.repo.file_has_content("test-annex.dat"))
    # We didn't commit anything because outputs weren't specified.
    assert_false(ds.repo.file_has_content("doubled.dat"))
    eq_(hexsha_initial, ds.repo.get_hexsha())

    # If an input doesn't exist, we just show the standard warning.
    with swallow_logs(new_level=logging.WARN) as cml:
        with swallow_outputs():
            ds.run("ls", inputs=["not-there"], explicit=True)
        assert_in("Input does not exist: ", cml.out)

    remove(op.join(path, "doubled.dat"))

    hexsha_initial = ds.repo.get_hexsha()
    ds.run("cat test-annex.dat test-annex.dat >doubled.dat",
           inputs=["test-annex.dat"], outputs=["doubled.dat"],
           explicit=True)
    ok_(ds.repo.file_has_content("doubled.dat"))
    assert_repo_status(ds.path, modified=["dirt_modified"], untracked=['dirt_untracked'])
    neq_(hexsha_initial, ds.repo.get_hexsha())

    # Saving explicit outputs works from subdirectories.
    subdir = op.join(path, "subdir")
    mkdir(subdir)
    with chpwd(subdir):
        run("echo insubdir >foo", explicit=True, outputs=["foo"])
    ok_(ds.repo.file_has_content(op.join("subdir", "foo")))
Example #24
0
def test_archive(path):
    ds = Dataset(opj(path, 'ds')).create(force=True)
    ds.save()
    committed_date = ds.repo.get_commit_date()
    default_outname = opj(path, 'datalad_{}.tar.gz'.format(ds.id))
    with chpwd(path):
        res = list(ds.export_archive())
        assert_status('ok', res)
        assert_result_count(res, 1)
        assert(isabs(res[0]['path']))
    assert_true(os.path.exists(default_outname))
    custom_outname = opj(path, 'myexport.tar.gz')
    # feed in without extension
    ds.export_archive(filename=custom_outname[:-7])
    assert_true(os.path.exists(custom_outname))
    custom1_md5 = md5sum(custom_outname)
    # encodes the original archive filename -> different checksum, despit
    # same content
    assert_not_equal(md5sum(default_outname), custom1_md5)
    # should really sleep so if they stop using time.time - we know
    time.sleep(1.1)
    ds.export_archive(filename=custom_outname)
    # should not encode mtime, so should be identical
    assert_equal(md5sum(custom_outname), custom1_md5)

    def check_contents(outname, prefix):
        with tarfile.open(outname) as tf:
            nfiles = 0
            for ti in tf:
                # any annex links resolved
                assert_false(ti.issym())
                ok_startswith(ti.name, prefix + '/')
                assert_equal(ti.mtime, committed_date)
                if '.datalad' not in ti.name:
                    # ignore any files in .datalad for this test to not be
                    # susceptible to changes in how much we generate a meta info
                    nfiles += 1
            # we have exactly four files (includes .gitattributes for default
            # MD5E backend), and expect no content for any directory
            assert_equal(nfiles, 4)
    check_contents(default_outname, 'datalad_%s' % ds.id)
    check_contents(custom_outname, 'myexport')

    # now loose some content
    ds.drop('file_up', check=False)
    assert_raises(IOError, ds.export_archive, filename=opj(path, 'my'))
    ds.export_archive(filename=opj(path, 'partial'), missing_content='ignore')
    assert_true(os.path.exists(opj(path, 'partial.tar.gz')))
Example #25
0
def test_diff_recursive(path):
    ds = Dataset(path).create()
    sub = ds.create('sub')
    # look at the last change, and confirm a dataset was added
    res = ds.diff(revision='HEAD~1..HEAD')
    assert_result_count(res, 1, action='diff', state='added', path=sub.path, type='dataset')
    # now recursive
    res = ds.diff(recursive=True, revision='HEAD~1..HEAD')
    # we also get the entire diff of the subdataset from scratch
    assert_status('ok', res)
    ok_(len(res) > 3)
    # one specific test
    assert_result_count(res, 1, action='diff', state='added', path=opj(sub.path, '.datalad', 'config'))

    # now we add a file to just the parent
    create_tree(ds.path, {'onefile': 'tobeadded', 'sub': {'twofile': 'tobeadded'}})
    res = ds.diff(recursive=True, report_untracked='all')
    assert_result_count(res, 3)
    assert_result_count(res, 1, action='diff', state='untracked', path=opj(ds.path, 'onefile'), type='file')
    assert_result_count(res, 1, action='diff', state='modified', path=sub.path, type='dataset')
    assert_result_count(res, 1, action='diff', state='untracked', path=opj(sub.path, 'twofile'), type='file')
    # save sub
    sub.add('.')
    # save sub in parent
    ds.save()
    # save addition in parent
    ds.add('.')
    ok_clean_git(ds.path)
    # look at the last change, only one file was added
    res = ds.diff(revision='HEAD~1..HEAD')
    assert_result_count(res, 1)
    assert_result_count(res, 1, action='diff', state='added', path=opj(ds.path, 'onefile'), type='file')

    # now the exact same thing with recursion, must not be different from the call
    # above
    res = ds.diff(recursive=True, revision='HEAD~1..HEAD')
    assert_result_count(res, 1)
    # last change in parent
    assert_result_count(res, 1, action='diff', state='added', path=opj(ds.path, 'onefile'), type='file')

    # one further back brings in the modified subdataset, and the added file within it
    res = ds.diff(recursive=True, revision='HEAD~2..HEAD')
    assert_result_count(res, 3)
    assert_result_count(res, 1, action='diff', state='added', path=opj(ds.path, 'onefile'), type='file')
    assert_result_count(res, 1, action='diff', state='added', path=opj(sub.path, 'twofile'), type='file')
    assert_result_count(res, 1, action='diff', state='modified', path=sub.path, type='dataset')
Example #26
0
def test_install_datasets_root(tdir):
    with chpwd(tdir):
        ds = install("///")
        ok_(ds.is_installed())
        eq_(ds.path, opj(tdir, get_datasets_topdir()))

        # do it a second time:
        result = install("///", result_xfm=None, return_type='list')
        assert_status('notneeded', result)
        eq_(YieldDatasets()(result[0]), ds)

        # and a third time into an existing something, that is not a dataset:
        with open(opj(tdir, 'sub', 'a_file.txt'), 'w') as f:
            f.write("something")

        with assert_raises(IncompleteResultsError) as cme:
            install("sub", source='///')
        assert_in("already exists and not empty", str(cme.exception))
Example #27
0
def test_remove_subds(path):
    ds = create(path)
    ds.create('sub')
    ds.create(op.join('sub', 'subsub'))
    assert_repo_status(ds.path)
    assert_result_count(
        ds.subdatasets(), 1,
        path=op.join(ds.path, 'sub'))
    # all good at this point, subdataset known, dataset clean
    # now have some external force wipe out the subdatasets
    rmtree(op.join(ds.path, 'sub'))
    assert_result_count(
        ds.status(), 1,
        path=op.join(ds.path, 'sub'),
        state='deleted')
    # a single call to save() must fix up the mess
    assert_status('ok', ds.save())
    assert_repo_status(ds.path)
Example #28
0
def test_exif(path):
    ds = Dataset(path).create()
    ds.config.add('datalad.metadata.nativetype', 'exif', where='dataset')
    copy(
        opj(dirname(dirname(dirname(__file__))), 'tests', 'data', 'exif.jpg'),
        path)
    ds.save()
    ok_clean_git(ds.path)
    res = ds.aggregate_metadata()
    assert_status('ok', res)
    res = ds.metadata('exif.jpg')
    assert_result_count(res, 1)
    # from this extractor
    meta = res[0]['metadata']['exif']
    for k, v in target.items():
        eq_(meta[k], v)

    assert_in('@context', meta)
Example #29
0
def test_multiway_merge(path):
    # prepare ds with two siblings, but no tracking branch
    ds = Dataset(op.join(path, 'ds_orig')).create()
    r1 = AnnexRepo(path=op.join(path, 'ds_r1'), git_opts={'bare': True})
    r2 = GitRepo(path=op.join(path, 'ds_r2'), git_opts={'bare': True})
    ds.siblings(action='add', name='r1', url=r1.path)
    ds.siblings(action='add', name='r2', url=r2.path)
    assert_status('ok', ds.publish(to='r1'))
    assert_status('ok', ds.publish(to='r2'))
    # just a fetch should be no issue
    assert_status('ok', ds.update())
    # ATM we do not support multi-way merges
    assert_status('impossible', ds.update(merge=True, on_failure='ignore'))
Example #30
0
def test_failon_nodrop(path):
    # test to make sure that we do not wipe out data when checks are enabled
    # despite the general error behavior mode
    ds = Dataset(path).create()
    # we play with a subdataset to bypass the tests that prevent the removal
    # of top-level datasets
    sub = ds.create('sub')
    create_tree(sub.path, {'test': 'content'})
    ds.save(opj('sub', 'test'))
    ok_clean_git(ds.path)
    eq_(['test'], sub.repo.get_annexed_files(with_content_only=True))
    # we put one file into the dataset's annex, no redundant copies
    # neither uninstall nor remove should work
    res = ds.uninstall('sub', check=True, on_failure='ignore')
    assert_status(['error', 'impossible'], res)
    eq_(['test'], sub.repo.get_annexed_files(with_content_only=True))
    # same with remove
    res = ds.remove('sub', check=True, on_failure='ignore')
    assert_status(['error', 'impossible'], res)
    eq_(['test'], sub.repo.get_annexed_files(with_content_only=True))
Example #31
0
def test_install_skip_failed_recursive(src, path):

    # install top level:
    ds = install(path, source=src)
    sub1 = Dataset(opj(path, 'subm 1'))
    sub2 = Dataset(opj(path, 'subm 2'))
    # sabotage recursive installation of 'subm 1' by polluting the target:
    with open(opj(path, 'subm 1', 'blocking.txt'), "w") as f:
        f.write("sdfdsf")

    with swallow_logs(new_level=logging.WARNING) as cml:
        result = ds.get(os.curdir,
                        recursive=True,
                        on_failure='ignore',
                        result_xfm=None)
        # toplevel dataset was in the house already
        assert_result_count(result, 0, path=ds.path, type='dataset')
        assert_status('error', [result[0]])
        assert_in_results(result, status='ok', path=sub2.path)

        cml.assert_logged(
            msg="target path already exists and not empty".format(sub1.path),
            regex=False,
            level='ERROR')
Example #32
0
def test_basics(path, nodspath):
    ds = Dataset(path).create()
    last_state = ds.repo.get_hexsha()
    # run inside the dataset
    with chpwd(path):
        # runs nothing, does nothing
        assert_result_count(ds.run(), 0)
        eq_(last_state, ds.repo.get_hexsha())
        # provoke command failure
        with assert_raises(CommandError) as cme:
            ds.run('7i3amhmuch9invalid')
            # let's not speculate that the exit code is always 127
            ok_(cme.code > 0)
        eq_(last_state, ds.repo.get_hexsha())
        # now one that must work
        res = ds.run('touch empty', message='TEST')
        ok_clean_git(ds.path)
        assert_result_count(res, 2)
        # TODO 'state' is still untracked!!!
        assert_result_count(res, 1, action='add', path=opj(ds.path, 'empty'), type='file')
        assert_result_count(res, 1, action='save', path=ds.path)
        commit_msg = ds.repo.repo.head.commit.message
        ok_(commit_msg.startswith('[DATALAD RUNCMD] TEST'))
        # crude test that we have a record for the PWD
        assert_in('"pwd": "."', commit_msg)
        last_state = ds.repo.get_hexsha()
        # now run a command that will not alter the dataset
        res = ds.run('touch empty', message='NOOP_TEST')
        assert_status('notneeded', res)
        eq_(last_state, ds.repo.get_hexsha())

    # run outside the dataset, should still work but with limitations
    with chpwd(nodspath):
        res = ds.run(['touch', 'empty2'], message='TEST')
        assert_status('ok', res)
        assert_result_count(res, 1, action='add', path=opj(ds.path, 'empty2'), type='file')
Example #33
0
def test_rerun_just_one_commit(path):
    ds = Dataset(path).create()
    if ds.repo.is_managed_branch():
        assert_status('impossible',
                      ds.rerun(branch="triggers-abort", on_failure="ignore"))
        raise SkipTest("Test incompatible with adjusted branch")

    ds.repo.checkout("orph", options=["--orphan"])
    ds.repo.call_git(["reset", "--hard"])
    ds.repo.config.reload()

    ds.run('echo static-content > static')
    eq_(len(ds.repo.get_revisions("HEAD")), 1)
    assert_raises(IncompleteResultsError, ds.rerun)
    assert_raises(IncompleteResultsError, ds.rerun, since="", onto="")

    # --script propagates the error.
    with swallow_outputs():
        assert_raises(IncompleteResultsError,
                      ds.rerun, since="", onto="", script="-")
    # --dry-run propagates the error.
    assert_raises(IncompleteResultsError,
                  ds.rerun, since="", onto="",
                  report=True, return_type="list")
def test_update_unborn_master(path):
    ds_a = Dataset(op.join(path, "ds-a")).create()
    ds_a.repo.call_git(["branch", "-m", DEFAULT_BRANCH, "other"])
    ds_a.repo.checkout(DEFAULT_BRANCH, options=["--orphan"])
    ds_b = install(source=ds_a.path, path=op.join(path, "ds-b"))

    ds_a.repo.checkout("other")
    (ds_a.pathobj / "foo").write_text("content")
    ds_a.save()

    # clone() will try to switch away from an unborn branch if there
    # is another ref available.  Reverse these efforts so that we can
    # test that update() fails reasonably here because we should still
    # be able to update from remotes that datalad didn't clone.
    ds_b.repo.update_ref("HEAD", "refs/heads/" + DEFAULT_BRANCH,
                         symbolic=True)
    assert_false(ds_b.repo.commit_exists("HEAD"))
    assert_status("impossible",
                  ds_b.update(merge=True, on_failure="ignore"))

    ds_b.repo.checkout("other")
    assert_status("ok",
                  ds_b.update(merge=True, on_failure="ignore"))
    eq_(ds_a.repo.get_hexsha(), ds_b.repo.get_hexsha())
Example #35
0
def test_uninstall_invalid(path):
    ds = Dataset(path).create(force=True)
    # no longer a uniform API for uninstall, drop, and remove
    for method in (uninstall,): #  remove, drop):
        with chpwd(ds.path):
            assert_status('error', method(on_failure='ignore'))
        # refuse to touch stuff outside the dataset
        assert_status('error', method(dataset=ds, path='..', on_failure='ignore'))
        # same if it doesn't exist, for consistency
        assert_status('error', method(dataset=ds, path='../madeupnonexist', on_failure='ignore'))
Example #36
0
def test_basics(path, nodspath):
    ds = Dataset(path).create()
    last_state = ds.repo.get_hexsha()
    # run inside the dataset
    with chpwd(path), \
            swallow_outputs():
        # provoke command failure
        with assert_raises(CommandError) as cme:
            ds.run('7i3amhmuch9invalid')
            # let's not speculate that the exit code is always 127
            ok_(cme.code > 0)
        eq_(last_state, ds.repo.get_hexsha())
        # now one that must work
        res = ds.run('touch empty', message='TEST')
        ok_clean_git(ds.path)
        assert_result_count(res, 2)
        # TODO 'state' is still untracked!!!
        assert_result_count(res,
                            1,
                            action='add',
                            path=opj(ds.path, 'empty'),
                            type='file')
        assert_result_count(res, 1, action='save', path=ds.path)
        commit_msg = ds.repo.repo.head.commit.message
        ok_(commit_msg.startswith('[DATALAD RUNCMD] TEST'))
        # crude test that we have a record for the PWD
        assert_in('"pwd": "."', commit_msg)
        last_state = ds.repo.get_hexsha()
        # now run a command that will not alter the dataset
        res = ds.run('touch empty', message='NOOP_TEST')
        assert_status('notneeded', res)
        eq_(last_state, ds.repo.get_hexsha())
        # We can also run the command via a single-item list because this is
        # what the CLI interface passes in for quoted commands.
        res = ds.run(['touch empty'], message='NOOP_TEST')
        assert_status('notneeded', res)

    # run outside the dataset, should still work but with limitations
    with chpwd(nodspath), \
            swallow_outputs():
        res = ds.run(['touch', 'empty2'], message='TEST')
        assert_status('ok', res)
        assert_result_count(res,
                            1,
                            action='add',
                            path=opj(ds.path, 'empty2'),
                            type='file')

    # running without a command is a noop
    with chpwd(path):
        with swallow_logs(new_level=logging.WARN) as cml:
            ds.run()
            assert_in("No command given", cml.out)
Example #37
0
def test_backup_archive(path, objtree, archivremote):
    """Similar to test_archive_layout(), but not focused on
    compatibility with the directory-type special remote. Instead,
    it tests build a second RIA remote from an existing one, e.g.
    for backup purposes.
    """
    ds = create(path)
    setup_archive_remote(ds.repo, objtree)
    populate_dataset(ds)
    ds.save()
    assert_repo_status(ds.path)

    # copy files into the RIA archive
    ds.repo.copy_to('.', 'archive')

    targetpath = Path(archivremote) / ds.id[:3] / ds.id[3:] / 'archives'
    targetpath.mkdir(parents=True)
    subprocess.run(
        ['7z', 'u', str(targetpath / 'archive.7z'), '.'],
        cwd=str(Path(objtree) / ds.id[:3] / ds.id[3:] / 'annex' / 'objects'),
    )
    initexternalremote(ds.repo,
                       '7z',
                       'ria',
                       config={'base-path': archivremote})
    # wipe out the initial RIA remote (just for testing if the upcoming
    # one can fully take over)
    shutil.rmtree(objtree)
    # fsck to make git-annex aware of the loss
    assert_status('error', [
        annexjson2result(r, ds)
        for r in ds.repo.fsck(remote='archive', fast=True)
    ])
    # now only available "here"
    eq_(len(ds.repo.whereis('one.txt')), 1)

    # make the backup archive known
    initexternalremote(ds.repo,
                       'backup',
                       'ria',
                       config={'base-path': archivremote})
    # now fsck the new remote to get the new special remote indexed
    assert_status('ok', [
        annexjson2result(r, ds)
        for r in ds.repo.fsck(remote='backup', fast=True)
    ])
    eq_(len(ds.repo.whereis('one.txt')), 2)

    # now we can drop all content locally, reobtain it, and survive an
    # fsck
    ds.drop('.')
    ds.get('.')
    assert_status('ok', [annexjson2result(r, ds) for r in ds.repo.fsck()])
Example #38
0
def test_add_insufficient_args(path):
    # no argument:
    assert_raises(InsufficientArgumentsError, add)
    # no `path`, no `source`:
    assert_raises(InsufficientArgumentsError, add, dataset=path)
    with chpwd(path):
        res = add(path="some", on_failure='ignore')
        assert_status('impossible', res)
    ds = Dataset(opj(path, 'ds'))
    ds.create()
    # non-existing path outside
    assert_status('impossible', ds.add(opj(path, 'outside'), on_failure='ignore'))
    # existing path outside
    with open(opj(path, 'outside'), 'w') as f:
        f.write('doesnt matter')
    assert_status('impossible', ds.add(opj(path, 'outside'), on_failure='ignore'))
Example #39
0
def test_reaggregate(path):
    ds, subds1, subds2 = _prep_partial_update_ds(path)
    # the actual job
    assert_status('ok', ds.meta_aggregate(recursive=True))
    # nothing without a modification
    assert_status('notneeded', ds.meta_aggregate(recursive=True))
    # modify subds1
    (subds1.pathobj / 'new').write_text(text_type('content'))
    ds.save(recursive=True)
    # go for a full re-aggregation, it should do the right thing
    # and only re-extract from subds1 and the root dataset
    # as these are the only ones with changes
    res = ds.meta_aggregate(recursive=True)
    # we should see three deletions, two for the replaced metadata blobs
    # of the modified subdataset, and one for the dataset metadata of the super
    assert_result_count(res, 3, action='delete')
    # four additions: two new blobs for the subdataset, one dataset
    # metadata blob for the root, due to a new modification date
    # and the aggregate catalog
    assert_result_count(res, 4, action='add')
    # partial reaggregation has tidied up everything nicely, so a
    # full aggregation does nothing
    good_state = ds.repo.get_hexsha()
    assert_status('notneeded', ds.meta_aggregate(recursive=True))
    # given a contraining path with also not trigger any further action
    eq_(good_state, ds.repo.get_hexsha())
    assert_status('notneeded', ds.meta_aggregate(path='down', recursive=True))
    eq_(good_state, ds.repo.get_hexsha())
    # but we can force extraction and get a selective update for this one
    # dataset only
    # not pointing to a subdataset itself, but do recursion from a subdirectory
    # downwards
    # but without an actual dataset change, and no change to an extractor's
    # output nothing will change in the dataset
    ds.meta_aggregate(path='down', recursive=True, force='extraction')
    eq_(good_state, ds.repo.get_hexsha())
Example #40
0
def test_get_invalid_call(path, file_outside):

    # no argument at all:
    assert_raises(InsufficientArgumentsError, get, None)
    assert_raises(InsufficientArgumentsError, get, [])
    # invalid dataset:
    assert_status('impossible', get(None, dataset=path, on_failure='ignore'))

    # have a plain git:
    ds = Dataset(path)
    ds.create(no_annex=True)
    with open(opj(path, "some.txt"), "w") as f:
        f.write("whatever")
    ds.save("some.txt", to_git=True, message="Initial commit.")

    # make it an annex (remove indicator file that create has placed
    # in the dataset to make it possible):
    (ds.pathobj / '.noannex').unlink()
    AnnexRepo(path, init=True, create=True)
    # call get again on a file in git:
    result = ds.get("some.txt")
    assert_status('notneeded', result)

    # invalid source:
    # yoh:  but now we would need to add it to annex since clever code first
    # checks what needs to be fetched at all
    create_tree(path, {'annexed.dat': 'some'})
    ds.save("annexed.dat")
    ds.repo.drop("annexed.dat", options=['--force'])
    with assert_raises(RemoteNotAvailableError) as ce:
        ds.get("annexed.dat", source='MysteriousRemote')
    eq_("MysteriousRemote", ce.exception.remote)

    res = ds.get("NotExistingFile.txt", on_failure='ignore')
    assert_status('impossible', res)
    assert_message("path does not exist", res)

    # path outside repo errors as with most other commands:
    res = ds.get(file_outside, on_failure='ignore')
    assert_in_results(res,
                      status='impossible',
                      message='path not associated with any dataset')
Example #41
0
def test_subdataset_save(path):
    parent = Dataset(path).create()
    sub = parent.create('sub')
    ok_clean_git(parent.path)
    create_tree(parent.path, {
        "untracked": 'ignore',
        'sub': {
            "new": "wanted"}})
    sub.add('new')
    # defined state: one untracked, modified (but clean in itself) subdataset
    ok_clean_git(sub.path)
    ok_clean_git(parent.path, untracked=['untracked'], index_modified=['sub'])

    # `save sub` does not save the parent!!
    with chpwd(parent.path):
        assert_status('notneeded', save(path=sub.path))
    ok_clean_git(parent.path, untracked=['untracked'], index_modified=['sub'])
    # `save -d .` saves the state change in the subdataset, but leaves any untracked
    # content alone
    with chpwd(parent.path):
        assert_status('ok', parent.save())
    ok_clean_git(parent.path, untracked=['untracked'])

    # get back to the original modified state and check that -S behaves in
    # exactly the same way
    create_tree(parent.path, {
        'sub': {
            "new2": "wanted2"}})
    sub.add('new2')
    ok_clean_git(parent.path, untracked=['untracked'], index_modified=['sub'])
    with chpwd(parent.path):
        assert_status(
            # notneeded to save sub, but need to save parent
            ['ok', 'notneeded'],
            # the key condition of this test is that no reference dataset is
            # given!
            save(path='sub', super_datasets=True))
    # save super must not cause untracked content to be commited!
    ok_clean_git(parent.path, untracked=['untracked'])
Example #42
0
def test_rerun(path, nodspath):
    ds = Dataset(path).create()
    sub = ds.create('sub')
    probe_path = opj(sub.path, 'sequence')
    # run inside the dataset
    with chpwd(path), \
            swallow_outputs():
        ds.run('echo x$(cat sub/sequence) > sub/sequence')
    # command ran once, all clean
    ok_clean_git(ds.path)
    eq_('x\n', open(probe_path).read())
    # now, for a rerun we can be anywhere, PWD and all are recorded
    # moreover, rerun must figure out which bits to unlock, even in
    # subdatasets
    with chpwd(nodspath), \
            swallow_outputs():
        ds.rerun()
    ok_clean_git(ds.path)
    # ran twice now
    eq_('xx\n', open(probe_path).read())

    # Rerunning from a subdataset skips the command.
    _, sub_info = get_run_info(ds, sub.repo.repo.head.commit.message)
    eq_(ds.id, sub_info["dsid"])
    assert_result_count(sub.rerun(return_type="list", on_failure="ignore"),
                        1,
                        status="impossible",
                        action="run",
                        rerun_action="skip")
    eq_('xx\n', open(probe_path).read())

    # Rerun fails with a dirty repo.
    dirt = opj(path, "dirt")
    with open(dirt, "w") as fh:
        fh.write("")
    assert_status('impossible', ds.rerun(on_failure="ignore"))
    remove(dirt)
    ok_clean_git(ds.path)

    # Make a non-run commit.
    with open(opj(path, "nonrun-file"), "w") as f:
        f.write("foo")
    ds.add("nonrun-file")
    # Now rerun the buried command.
    ds.rerun(revision="HEAD~", message="rerun buried")
    eq_('xxx\n', open(probe_path).read())
    # Also check that the messasge override worked.
    eq_(
        ds.repo.format_commit("%B").splitlines()[0],
        "[DATALAD RUNCMD] rerun buried")
    # Or a range of commits, skipping non-run commits.
    ds.rerun(since="HEAD~3")
    eq_('xxxxx\n', open(probe_path).read())
    # Or --since= to run all reachable commits.
    ds.rerun(since="")
    eq_('xxxxxxxxxx\n', open(probe_path).read())

    # We can get back a report of what would happen rather than actually
    # rerunning anything.
    report = ds.rerun(since="", report=True, return_type="list")
    # Nothing changed.
    eq_('xxxxxxxxxx\n', open(probe_path).read())
    assert_result_count(report, 1, rerun_action="skip")
    report[-1]["commit"] == ds.repo.get_hexsha()

    # If a file is dropped, we remove it instead of unlocking it.
    ds.drop(probe_path, check=False)
    with swallow_outputs():
        ds.rerun()

    eq_('x\n', open(probe_path).read())
    # If the history to rerun has a merge commit, we abort.
    ds.repo.checkout("HEAD~3", options=["-b", "topic"])
    with open(opj(path, "topic-file"), "w") as f:
        f.write("topic")
    ds.add("topic-file")
    ds.repo.checkout("master")
    ds.repo.merge("topic")
    ok_clean_git(ds.path)
    assert_raises(IncompleteResultsError, ds.rerun)
Example #43
0
def test_rerun_empty_branch(path):
    GitRepo(path, create=True)
    ds = Dataset(path)
    assert_status("impossible", ds.rerun(on_failure="ignore"))
Example #44
0
def test_aggregation(path):
    with chpwd(path):
        assert_raises(InsufficientArgumentsError, aggregate_metadata, None)
    # a hierarchy of three (super/sub)datasets, each with some native metadata
    ds = Dataset(opj(path, 'origin')).create(force=True)
    # before anything aggregated we would get nothing and only a log warning
    with swallow_logs(new_level=logging.WARNING) as cml:
        assert_equal(list(query_aggregated_metadata('all', ds, [])), [])
    assert_re_in('.*Found no aggregated metadata.*update', cml.out)
    ds.config.add('datalad.metadata.nativetype',
                  'frictionless_datapackage',
                  where='dataset')
    subds = ds.create('sub', force=True)
    subds.config.add('datalad.metadata.nativetype',
                     'frictionless_datapackage',
                     where='dataset')
    subsubds = subds.create('subsub', force=True)
    subsubds.config.add('datalad.metadata.nativetype',
                        'frictionless_datapackage',
                        where='dataset')
    ds.save(recursive=True)
    assert_repo_status(ds.path)
    # aggregate metadata from all subdatasets into any superdataset, including
    # intermediate ones
    res = ds.aggregate_metadata(recursive=True, update_mode='all')
    # we get success report for both subdatasets and the superdataset,
    # and they get saved
    assert_result_count(res, 3, status='ok', action='aggregate_metadata')
    assert_in_results(res, action='save', status="ok")
    # nice and tidy
    assert_repo_status(ds.path)

    # quick test of aggregate report
    aggs = ds.metadata(get_aggregates=True)
    # one for each dataset
    assert_result_count(aggs, 3)
    # mother also report layout version
    assert_result_count(aggs, 1, path=ds.path, layout_version=1)

    # store clean direct result
    origres = ds.metadata(recursive=True)
    # basic sanity check
    assert_result_count(origres, 6)
    assert_result_count(origres, 3, type='dataset')
    assert_result_count(origres, 3, type='file')  # Now that we have annex.key
    # three different IDs
    assert_equal(
        3, len(set([s['dsid'] for s in origres if s['type'] == 'dataset'])))
    # and we know about all three datasets
    for name in ('MOTHER_äöü東', 'child_äöü東', 'grandchild_äöü東'):
        assert_true(
            sum([s['metadata']['frictionless_datapackage']['name'] \
                    == ensure_unicode(name) for s in origres
                 if s['type'] == 'dataset']))

    # now clone the beast to simulate a new user installing an empty dataset
    clone = install(opj(path, 'clone'),
                    source=ds.path,
                    result_xfm='datasets',
                    return_type='item-or-list')
    # ID mechanism works
    assert_equal(ds.id, clone.id)

    # get fresh metadata
    cloneres = clone.metadata()
    # basic sanity check
    assert_result_count(cloneres, 2)
    assert_result_count(cloneres, 1, type='dataset')
    assert_result_count(cloneres, 1, type='file')

    # now loop over the previous results from the direct metadata query of
    # origin and make sure we get the extact same stuff from the clone
    _compare_metadata_helper(origres, clone)

    # now obtain a subdataset in the clone, should make no difference
    assert_status('ok',
                  clone.install('sub', result_xfm=None, return_type='list'))
    _compare_metadata_helper(origres, clone)

    # test search in search tests, not all over the place
    ## query smoke test
    assert_result_count(clone.search('mother', mode='egrep'), 1)
    assert_result_count(clone.search('(?i)MoTHER', mode='egrep'), 1)

    child_res = clone.search('child', mode='egrep')
    assert_result_count(child_res, 2)
    for r in child_res:
        if r['type'] == 'dataset':
            assert_in(r['query_matched']['frictionless_datapackage.name'],
                      r['metadata']['frictionless_datapackage']['name'])
Example #45
0
def _test_version_check(host, dspath, store):

    dspath = Path(dspath)
    store = Path(store)

    ds = Dataset(dspath).create()
    populate_dataset(ds)
    ds.save()
    assert_repo_status(ds.path)

    # set up store:
    io = SSHRemoteIO(host) if host else LocalIO()
    if host:
        store_url = "ria+ssh://{host}{path}".format(host=host,
                                                    path=store)
    else:
        store_url = "ria+{}".format(store.as_uri())

    create_store(io, store, '1')

    # TODO: Re-establish test for version 1
    # version 2: dirhash
    create_ds_in_store(io, store, ds.id, '2', '1')

    # add special remote
    init_opts = common_init_opts + ['url={}'.format(store_url)]
    ds.repo.init_remote('store', options=init_opts)
    ds.repo.copy_to('.', 'store')

    # check version files
    remote_ds_tree_version_file = store / 'ria-layout-version'
    dsgit_dir, archive_dir, dsobj_dir = \
        get_layout_locations(1, store, ds.id)
    remote_obj_tree_version_file = dsgit_dir / 'ria-layout-version'

    assert_true(remote_ds_tree_version_file.exists())
    assert_true(remote_obj_tree_version_file.exists())

    with open(str(remote_ds_tree_version_file), 'r') as f:
        assert_equal(f.read().strip(), '1')
    with open(str(remote_obj_tree_version_file), 'r') as f:
        assert_equal(f.read().strip(), '2')

    # Accessing the remote should not yield any output regarding versioning,
    # since it's the "correct" version. Note that "fsck" is an arbitrary choice.
    # We need just something to talk to the special remote.
    with swallow_logs(new_level=logging.INFO) as cml:
        ds.repo.fsck(remote='store', fast=True)
        # TODO: For some reason didn't get cml.assert_logged to assert
        #       "nothing was logged"
        assert not cml.out

    # Now fake-change the version
    with open(str(remote_obj_tree_version_file), 'w') as f:
        f.write('X\n')

    # Now we should see a message about it
    with swallow_logs(new_level=logging.INFO) as cml:
        ds.repo.fsck(remote='store', fast=True)
        cml.assert_logged(level="INFO",
                          msg="Remote object tree reports version X",
                          regex=False)

    # reading still works:
    ds.drop('.')
    assert_status('ok', ds.get('.'))

    # but writing doesn't:
    with open(str(Path(ds.path) / 'new_file'), 'w') as f:
        f.write("arbitrary addition")
    ds.save(message="Add a new_file")

    # TODO: use self.annex.error in special remote and see whether we get an
    #       actual error result
    assert_raises(CommandError,
                  ds.repo.copy_to, 'new_file', 'store')

    # However, we can force it by configuration
    ds.config.add("annex.ora-remote.store.force-write", "true", where='local')
    ds.repo.copy_to('new_file', 'store')
Example #46
0
def test_push_recursive(origin_path, src_path, dst_top, dst_sub,
                        dst_subnoannex, dst_subsub):
    # dataset with two submodules and one subsubmodule
    origin = Dataset(origin_path).create()
    origin_subm1 = origin.create('sub m')
    origin_subm1.create('subsub m')
    origin.create('subm noannex', annex=False)
    origin.save()
    assert_repo_status(origin.path)
    # prepare src as a fresh clone with all subdatasets checkout out recursively
    # running on a clone should make the test scenario more different than
    # test_push(), even for the pieces that should be identical
    top = Clone.__call__(source=origin.path, path=src_path)
    sub, subsub, subnoannex = top.get('.',
                                      recursive=True,
                                      get_data=False,
                                      result_xfm='datasets')

    target_top = mk_push_target(top, 'target', dst_top, annex=True)
    # subdatasets have no remote yet, so recursive publishing should fail:
    res = top.push(to="target", recursive=True, on_failure='ignore')
    assert_in_results(res,
                      path=top.path,
                      type='dataset',
                      refspec='refs/heads/master:refs/heads/master',
                      operations=['new-branch'],
                      action='publish',
                      status='ok',
                      target='target')
    for d in (sub, subsub, subnoannex):
        assert_in_results(res,
                          status='error',
                          type='dataset',
                          path=d.path,
                          message=("Unknown target sibling '%s'.", 'target'))
    # now fix that and set up targets for the submodules
    target_sub = mk_push_target(sub, 'target', dst_sub, annex=True)
    target_subnoannex = mk_push_target(subnoannex,
                                       'target',
                                       dst_subnoannex,
                                       annex=False)
    target_subsub = mk_push_target(subsub, 'target', dst_subsub, annex=True)

    # and same push call as above
    res = top.push(to="target", recursive=True)
    # topds skipped
    assert_in_results(res,
                      path=top.path,
                      type='dataset',
                      action='publish',
                      status='notneeded',
                      target='target')
    # the rest pushed
    for d in (sub, subsub, subnoannex):
        assert_in_results(res,
                          status='ok',
                          type='dataset',
                          path=d.path,
                          refspec='refs/heads/master:refs/heads/master')
    # all correspondig branches match across all datasets
    for s, d in zip(
        (top, sub, subnoannex, subsub),
        (target_top, target_sub, target_subnoannex, target_subsub)):
        eq_(list(s.repo.get_branch_commits_("master")),
            list(d.get_branch_commits_("master")))
        if s != subnoannex:
            eq_(list(s.repo.get_branch_commits_("git-annex")),
                list(d.get_branch_commits_("git-annex")))

    # rerun should not result in further pushes of master
    res = top.push(to="target", recursive=True)
    assert_not_in_results(res,
                          status='ok',
                          refspec="refs/heads/master:refs/heads/master")
    assert_in_results(res,
                      status='notneeded',
                      refspec="refs/heads/master:refs/heads/master")

    if top.repo.is_managed_branch():
        raise SkipTest(
            'Save/status of subdataset with managed branches is an still '
            'unresolved issue')

    # now annex a file in subsub
    test_copy_file = subsub.pathobj / 'test_mod_annex_file'
    test_copy_file.write_text("Heavy stuff.")
    # save all the way up
    assert_status(('ok', 'notneeded'),
                  top.save(message='subsub got something', recursive=True))
    assert_repo_status(top.path)
    # publish straight up, should be smart by default
    res = top.push(to="target", recursive=True)
    # we see 3 out of 4 datasets pushed (sub noannex was left unchanged)
    for d in (top, sub, subsub):
        assert_in_results(res,
                          status='ok',
                          type='dataset',
                          path=d.path,
                          refspec='refs/heads/master:refs/heads/master')
    # file content copied too
    assert_in_results(res,
                      action='copy',
                      status='ok',
                      path=str(test_copy_file))
    # verify it is accessible, drop and bring back
    assert_status('ok', top.drop(str(test_copy_file)))
    ok_(not subsub.repo.file_has_content('test_mod_annex_file'))
    top.get(test_copy_file)
    ok_file_has_content(test_copy_file, 'Heavy stuff.')

    # make two modification
    (sub.pathobj / 'test_mod_annex_file').write_text('annex')
    (subnoannex.pathobj / 'test_mod_file').write_text('git')
    # save separately
    top.save(sub.pathobj, message='annexadd', recursive=True)
    top.save(subnoannex.pathobj, message='gitadd', recursive=True)
    # now only publish the latter one
    res = top.push(to="target", since='HEAD~1', recursive=True)
    # nothing copied, no reports on the other modification
    assert_not_in_results(res, action='copy')
    assert_not_in_results(res, path=sub.path)
    for d in (top, subnoannex):
        assert_in_results(res,
                          status='ok',
                          type='dataset',
                          path=d.path,
                          refspec='refs/heads/master:refs/heads/master')
    # an unconditional push should now pick up the remaining changes
    res = top.push(to="target", recursive=True)
    assert_in_results(res,
                      action='copy',
                      status='ok',
                      path=str(sub.pathobj / 'test_mod_annex_file'))
    assert_in_results(res,
                      status='ok',
                      type='dataset',
                      path=sub.path,
                      refspec='refs/heads/master:refs/heads/master')
    for d in (top, subnoannex, subsub):
        assert_in_results(res,
                          status='notneeded',
                          type='dataset',
                          path=d.path,
                          refspec='refs/heads/master:refs/heads/master')
Example #47
0
def test_update_simple(origin, src_path, dst_path):

    # prepare src
    source = install(src_path, source=origin, recursive=True)
    # forget we cloned it (provide no 'origin' anymore), which should lead to
    # setting tracking branch to target:
    source.repo.remove_remote("origin")

    # get a clone to update later on:
    dest = install(dst_path, source=src_path, recursive=True)
    # test setup done;
    # assert all fine
    ok_clean_git(dst_path)
    ok_clean_git(src_path)

    # update yields nothing => up-to-date
    assert_status('ok', dest.update())
    ok_clean_git(dst_path)

    # modify origin:
    with open(opj(src_path, "update.txt"), "w") as f:
        f.write("Additional content")
    source.add(path="update.txt")
    source.save("Added update.txt")
    ok_clean_git(src_path)

    # update without `merge` only fetches:
    assert_status('ok', dest.update())
    # modification is not known to active branch:
    assert_not_in("update.txt",
                  dest.repo.get_files(dest.repo.get_active_branch()))
    # modification is known to branch origin/master
    assert_in("update.txt", dest.repo.get_files("origin/master"))

    # merge:
    assert_status('ok', dest.update(merge=True))
    # modification is now known to active branch:
    assert_in("update.txt", dest.repo.get_files(dest.repo.get_active_branch()))
    # it's known to annex, but has no content yet:
    dest.repo.get_file_key("update.txt")  # raises if unknown
    eq_([False], dest.repo.file_has_content(["update.txt"]))

    # smoke-test if recursive update doesn't fail if submodule is removed
    # and that we can run it from within a dataset without providing it
    # explicitly
    assert_result_count(dest.remove('subm 1'),
                        1,
                        status='ok',
                        action='remove',
                        path=opj(dest.path, 'subm 1'))
    with chpwd(dest.path):
        assert_result_count(update(recursive=True),
                            2,
                            status='ok',
                            type='dataset')
    assert_result_count(dest.update(merge=True, recursive=True),
                        2,
                        status='ok',
                        type='dataset')

    # and now test recursive update with merging in differences
    create_tree(opj(source.path, '2'), {'load.dat': 'heavy'})
    source.add(opj('2', 'load.dat'),
               message="saving changes within subm2",
               recursive=True)
    assert_result_count(dest.update(merge=True, recursive=True),
                        2,
                        status='ok',
                        type='dataset')
    # and now we can get new file
    dest.get('2/load.dat')
    ok_file_has_content(opj(dest.path, '2', 'load.dat'), 'heavy')
Example #48
0
def test_diff_recursive(path):
    ds = Dataset(path).create()
    sub = ds.create('sub')
    # look at the last change, and confirm a dataset was added
    res = ds.diff(revision='HEAD~1..HEAD')
    assert_result_count(res,
                        1,
                        action='diff',
                        state='added',
                        path=sub.path,
                        type='dataset')
    # now recursive
    res = ds.diff(recursive=True, revision='HEAD~1..HEAD')
    # we also get the entire diff of the subdataset from scratch
    assert_status('ok', res)
    ok_(len(res) > 3)
    # one specific test
    assert_result_count(res,
                        1,
                        action='diff',
                        state='added',
                        path=opj(sub.path, '.datalad', 'config'))

    # now we add a file to just the parent
    create_tree(ds.path, {
        'onefile': 'tobeadded',
        'sub': {
            'twofile': 'tobeadded'
        }
    })
    res = ds.diff(recursive=True, report_untracked='all')
    assert_result_count(res, 3)
    assert_result_count(res,
                        1,
                        action='diff',
                        state='untracked',
                        path=opj(ds.path, 'onefile'),
                        type='file')
    assert_result_count(res,
                        1,
                        action='diff',
                        state='modified',
                        path=sub.path,
                        type='dataset')
    assert_result_count(res,
                        1,
                        action='diff',
                        state='untracked',
                        path=opj(sub.path, 'twofile'),
                        type='file')
    # save sub
    sub.add('.')
    # save sub in parent
    ds.save()
    # save addition in parent
    ds.add('.')
    ok_clean_git(ds.path)
    # look at the last change, only one file was added
    res = ds.diff(revision='HEAD~1..HEAD')
    assert_result_count(res, 1)
    assert_result_count(res,
                        1,
                        action='diff',
                        state='added',
                        path=opj(ds.path, 'onefile'),
                        type='file')

    # now the exact same thing with recursion, must not be different from the call
    # above
    res = ds.diff(recursive=True, revision='HEAD~1..HEAD')
    assert_result_count(res, 1)
    # last change in parent
    assert_result_count(res,
                        1,
                        action='diff',
                        state='added',
                        path=opj(ds.path, 'onefile'),
                        type='file')

    # one further back brings in the modified subdataset, and the added file within it
    res = ds.diff(recursive=True, revision='HEAD~2..HEAD')
    assert_result_count(res, 3)
    assert_result_count(res,
                        1,
                        action='diff',
                        state='added',
                        path=opj(ds.path, 'onefile'),
                        type='file')
    assert_result_count(res,
                        1,
                        action='diff',
                        state='added',
                        path=opj(sub.path, 'twofile'),
                        type='file')
    assert_result_count(res,
                        1,
                        action='diff',
                        state='modified',
                        path=sub.path,
                        type='dataset')
Example #49
0
def test_update_simple(origin, src_path, dst_path):

    # prepare src
    source = install(src_path, source=origin, recursive=True)
    # forget we cloned it (provide no 'origin' anymore), which should lead to
    # setting tracking branch to target:
    source.repo.remove_remote("origin")

    # dataset without sibling will not need updates
    assert_status('notneeded', source.update())
    # deprecation message doesn't ruin things
    assert_status('notneeded', source.update(fetch_all=True))
    # but error if unknown sibling is given
    assert_status('impossible',
                  source.update(sibling='funky', on_failure='ignore'))

    # get a clone to update later on:
    dest = install(dst_path, source=src_path, recursive=True)
    # test setup done;
    # assert all fine
    assert_repo_status(dst_path)
    assert_repo_status(src_path)

    # update yields nothing => up-to-date
    assert_status('ok', dest.update())
    assert_repo_status(dst_path)

    # modify origin:
    with open(opj(src_path, "update.txt"), "w") as f:
        f.write("Additional content")
    source.save(path="update.txt", message="Added update.txt")
    assert_repo_status(src_path)

    # update without `merge` only fetches:
    assert_status('ok', dest.update())
    # modification is not known to active branch:
    assert_not_in("update.txt",
                  dest.repo.get_files(dest.repo.get_active_branch()))
    # modification is known to branch origin/<default branch>
    assert_in("update.txt", dest.repo.get_files("origin/" + DEFAULT_BRANCH))

    # merge:
    assert_status('ok', dest.update(merge=True))
    # modification is now known to active branch:
    assert_in("update.txt", dest.repo.get_files(dest.repo.get_active_branch()))
    # it's known to annex, but has no content yet:
    dest.repo.get_file_key("update.txt")  # raises if unknown
    eq_([False], dest.repo.file_has_content(["update.txt"]))

    # check subdataset path constraints, baseline (parent + 2 subds)
    assert_result_count(dest.update(recursive=True),
                        3,
                        status='ok',
                        type='dataset')
    # no recursion and invalid path still updates the parent
    res = dest.update(path='whatever')
    assert_result_count(res, 1, status='ok', type='dataset')
    assert_result_count(res, 1, status='ok', path=dest.path)
    # invalid path with recursion also does
    res = dest.update(recursive=True, path='whatever')
    assert_result_count(res, 1, status='ok', type='dataset')
    assert_result_count(res, 1, status='ok', path=dest.path)
    # valid path and no recursion only updates the parent
    res = dest.update(path='subm 1')
    assert_result_count(res, 1, status='ok', type='dataset')
    assert_result_count(res, 1, status='ok', path=dest.path)
    # valid path and recursion updates matching
    res = dest.update(recursive=True, path='subm 1')
    assert_result_count(res, 2, status='ok', type='dataset')
    assert_result_count(res, 1, status='ok', path=dest.path)
    assert_result_count(res, 1, status='ok', path=str(dest.pathobj / 'subm 1'))
    # additional invalid path doesn't hurt
    res = dest.update(recursive=True, path=['subm 1', 'mike'])
    assert_result_count(res, 2, status='ok', type='dataset')
    # full match
    res = dest.update(recursive=True, path=['subm 1', '2'])
    assert_result_count(res, 3, status='ok', type='dataset')

    # test that update doesn't crash if we specify only a single path (submod) to
    # operate on
    with chpwd(dest.path):
        # in 0.11.x it would be a single result since "pwd" dataset is not
        # considered, and would be relative path (as specified).
        # In 0.12.0 - it would include implicit pwd dataset, and paths would be absolute
        res_update = update(path=['subm 1'], recursive=True)
        assert_result_count(res_update, 2)
        for p in dest.path, str(dest.pathobj / 'subm 1'):
            assert_in_results(res_update,
                              path=p,
                              action='update',
                              status='ok',
                              type='dataset')

        # and with merge we would also try to save (but there would be no changes)
        res_merge = update(path=['subm 1'], recursive=True, merge=True)
        assert_result_count(res_merge, 2, action='update')
        # 2 of "updates" really.
        assert_in_results(res_merge,
                          action='update',
                          status='ok',
                          type='dataset')
        assert_in_results(res_merge,
                          action='save',
                          status='notneeded',
                          type='dataset')

    # smoke-test if recursive update doesn't fail if submodule is removed
    # and that we can run it from within a dataset without providing it
    # explicitly
    assert_result_count(dest.remove('subm 1'),
                        1,
                        status='ok',
                        action='remove',
                        path=opj(dest.path, 'subm 1'))
    with chpwd(dest.path):
        assert_result_count(update(recursive=True),
                            2,
                            status='ok',
                            type='dataset')
    assert_result_count(dest.update(merge=True, recursive=True),
                        2,
                        action='update',
                        status='ok',
                        type='dataset')

    # and now test recursive update with merging in differences
    create_tree(opj(source.path, '2'), {'load.dat': 'heavy'})
    source.save(opj('2', 'load.dat'),
                message="saving changes within subm2",
                recursive=True)
    assert_result_count(dest.update(merge=True, recursive=True),
                        2,
                        action='update',
                        status='ok',
                        type='dataset')
    # and now we can get new file
    dest.get('2/load.dat')
    ok_file_has_content(opj(dest.path, '2', 'load.dat'), 'heavy')
Example #50
0
def test_unlock(path):

    ds = Dataset(path)

    # file is currently locked:
    # TODO: use get_annexed_files instead of hardcoded filename
    assert_raises(IOError, open, opj(path, 'test-annex.dat'), "w")

    # in direct mode there is no unlock:
    if ds.repo.is_direct_mode():
        res = ds.unlock()
        assert_result_count(res, 1)
        assert_status('notneeded', res)

    # in V6 we can unlock even if the file's content isn't present:
    elif ds.repo.config.getint("annex", "version") == 6:
        res = ds.unlock()
        assert_result_count(res, 1)
        assert_status('ok', res)
        # TODO: RF: make 'lock' a command as well
        # re-lock to further on have a consistent situation with V5:
        ds.repo._git_custom_command('test-annex.dat', ['git', 'annex', 'lock'])
    else:
        # cannot unlock without content (annex get wasn't called)
        assert_raises(CommandError, ds.unlock)  # FIXME

    ds.repo.get('test-annex.dat')
    result = ds.unlock()
    assert_result_count(result, 1)
    if ds.repo.is_direct_mode():
        assert_status('notneeded', result)
    else:
        assert_in_results(result,
                          path=opj(ds.path, 'test-annex.dat'),
                          status='ok')

    with open(opj(path, 'test-annex.dat'), "w") as f:
        f.write("change content")

    ds.repo.add('test-annex.dat')
    # in V6 we need to explicitly re-lock it:
    if ds.repo.config.getint("annex", "version") == 6:
        # TODO: RF: make 'lock' a command as well
        # re-lock to further on have a consistent situation with V5:
        ds.repo._git_custom_command('test-annex.dat', ['git', 'annex', 'lock'])
    ds.repo.commit("edit 'test-annex.dat' via unlock and lock it again")

    if not ds.repo.is_direct_mode():
        # after commit, file is locked again:
        assert_raises(IOError, open, opj(path, 'test-annex.dat'), "w")

    # content was changed:
    with open(opj(path, 'test-annex.dat'), "r") as f:
        eq_("change content", f.read())

    # unlock again, this time more specific:
    result = ds.unlock(path='test-annex.dat')
    assert_result_count(result, 1)

    if ds.repo.is_direct_mode():
        assert_in_results(result,
                          path=opj(ds.path, 'test-annex.dat'),
                          status='notneeded')
    else:
        assert_in_results(result,
                          path=opj(ds.path, 'test-annex.dat'),
                          status='ok')

    with open(opj(path, 'test-annex.dat'), "w") as f:
        f.write("change content again")

    ds.repo.add('test-annex.dat')
    # in V6 we need to explicitly re-lock it:
    if ds.repo.config.getint("annex", "version") == 6:
        # TODO: RF: make 'lock' a command as well
        # re-lock to further on have a consistent situation with V5:
        ds.repo._git_custom_command('test-annex.dat', ['git', 'annex', 'lock'])
    ds.repo.commit("edit 'test-annex.dat' via unlock and lock it again")

    # TODO:
    # BOOOM: test-annex.dat writeable in V6!
    # Why the hell is this different than the first time we wrote to the file
    # and locked it again?
    # Also: After opening the file is empty.

    if not ds.repo.is_direct_mode():
        # after commit, file is locked again:
        assert_raises(IOError, open, opj(path, 'test-annex.dat'), "w")

    # content was changed:
    with open(opj(path, 'test-annex.dat'), "r") as f:
        eq_("change content again", f.read())
Example #51
0
def test_diff(path, norepo):
    with chpwd(norepo):
        assert_status('impossible', diff(on_failure='ignore'))
    ds = Dataset(path).create()
    ok_clean_git(ds.path)
    # reports stupid revision input
    assert_result_count(ds.diff(revision='WTF', on_failure='ignore'),
                        1,
                        status='impossible',
                        message="fatal: bad revision 'WTF'")
    assert_result_count(ds.diff(), 0)
    # no diff
    assert_result_count(ds.diff(), 0)
    assert_result_count(ds.diff(revision='HEAD'), 0)
    # bogus path makes no difference
    assert_result_count(ds.diff(path='THIS', revision='HEAD'), 0)
    # comparing to a previous state we should get a diff in most cases
    # for this test, let's not care what exactly it is -- will do later
    assert len(ds.diff(revision='HEAD~1')) > 0
    # let's introduce a known change
    create_tree(ds.path, {'new': 'empty'})
    ds.add('.', to_git=True)
    ok_clean_git(ds.path)
    res = ds.diff(revision='HEAD~1')
    assert_result_count(res, 1)
    assert_result_count(res,
                        1,
                        action='diff',
                        path=opj(ds.path, 'new'),
                        state='added')
    # we can also find the diff without going through the dataset explicitly
    with chpwd(ds.path):
        assert_result_count(diff(revision='HEAD~1'),
                            1,
                            action='diff',
                            path=opj(ds.path, 'new'),
                            state='added')
    # no diff against HEAD
    assert_result_count(ds.diff(), 0)
    # modify known file
    create_tree(ds.path, {'new': 'notempty'})
    for diffy in (None, 'HEAD'):
        res = ds.diff(revision=diffy)
        assert_result_count(res, 1)
        assert_result_count(res,
                            1,
                            action='diff',
                            path=opj(ds.path, 'new'),
                            state='modified')
    # but if we give another path, it doesn't show up
    assert_result_count(ds.diff('otherpath'), 0)
    # giving the right path must work though
    assert_result_count(ds.diff('new'),
                        1,
                        action='diff',
                        path=opj(ds.path, 'new'),
                        state='modified')
    # stage changes
    ds.add('.', to_git=True, save=False)
    # no diff, because we staged the modification
    assert_result_count(ds.diff(), 0)
    # but we can get at it
    assert_result_count(ds.diff(staged=True),
                        1,
                        action='diff',
                        path=opj(ds.path, 'new'),
                        state='modified')
    # OR
    assert_result_count(ds.diff(revision='HEAD'),
                        1,
                        action='diff',
                        path=opj(ds.path, 'new'),
                        state='modified')
    ds.save()
    ok_clean_git(ds.path)

    # untracked stuff
    create_tree(ds.path, {'deep': {'down': 'untracked', 'down2': 'tobeadded'}})
    # a plain diff should report the untracked file
    # but not directly, because the parent dir is already unknown
    res = ds.diff()
    assert_result_count(res, 1)
    assert_result_count(res,
                        1,
                        state='untracked',
                        type='directory',
                        path=opj(ds.path, 'deep'))
    # report of individual files is also possible
    assert_result_count(ds.diff(report_untracked='all'),
                        2,
                        state='untracked',
                        type='file')
    # an unmatching path will hide this result
    assert_result_count(ds.diff(path='somewhere'), 0)
    # perfect match and anything underneath will do
    assert_result_count(ds.diff(path='deep'),
                        1,
                        state='untracked',
                        path=opj(ds.path, 'deep'),
                        type='directory')
    assert_result_count(ds.diff(path='deep'),
                        1,
                        state='untracked',
                        path=opj(ds.path, 'deep'))
    # now we stage on of the two files in deep
    ds.add(opj('deep', 'down2'), to_git=True, save=False)
    # without any reference it will ignore the staged stuff and report the remaining
    # untracked file
    assert_result_count(ds.diff(),
                        1,
                        state='untracked',
                        path=opj(ds.path, 'deep', 'down'),
                        type='file')
    res = ds.diff(staged=True)
    assert_result_count(res,
                        1,
                        state='untracked',
                        path=opj(ds.path, 'deep', 'down'),
                        type='file')
    assert_result_count(res,
                        1,
                        state='added',
                        path=opj(ds.path, 'deep', 'down2'),
                        type='file')
Example #52
0
def test_diff_recursive(path):
    ds = Dataset(path).create()
    sub = ds.create('sub')
    # look at the last change, and confirm a dataset was added
    res = ds.diff(fr=DEFAULT_BRANCH + '~1',
                  to=DEFAULT_BRANCH,
                  result_renderer=None)
    assert_result_count(res,
                        1,
                        action='diff',
                        state='added',
                        path=sub.path,
                        type='dataset')
    # now recursive
    res = ds.diff(recursive=True,
                  fr=DEFAULT_BRANCH + '~1',
                  to=DEFAULT_BRANCH,
                  result_renderer=None)
    # we also get the entire diff of the subdataset from scratch
    assert_status('ok', res)
    ok_(len(res) > 3)
    # one specific test
    assert_result_count(res,
                        1,
                        action='diff',
                        state='added',
                        path=op.join(sub.path, '.datalad', 'config'))

    # now we add a file to just the parent
    create_tree(ds.path, {
        'onefile': 'tobeadded',
        'sub': {
            'twofile': 'tobeadded'
        }
    })
    res = ds.diff(recursive=True, untracked='all', result_renderer=None)
    assert_result_count(_dirty_results(res), 3)
    assert_result_count(res,
                        1,
                        action='diff',
                        state='untracked',
                        path=op.join(ds.path, 'onefile'),
                        type='file')
    assert_result_count(res,
                        1,
                        action='diff',
                        state='modified',
                        path=sub.path,
                        type='dataset')
    assert_result_count(res,
                        1,
                        action='diff',
                        state='untracked',
                        path=op.join(sub.path, 'twofile'),
                        type='file')
    # intentional save in two steps to make check below easier
    ds.save('sub', recursive=True)
    ds.save()
    assert_repo_status(ds.path)

    head_ref = DEFAULT_BRANCH if ds.repo.is_managed_branch() else 'HEAD'

    # look at the last change, only one file was added
    res = ds.diff(fr=head_ref + '~1', to=head_ref, result_renderer=None)
    assert_result_count(_dirty_results(res), 1)
    assert_result_count(res,
                        1,
                        action='diff',
                        state='added',
                        path=op.join(ds.path, 'onefile'),
                        type='file')

    # now the exact same thing with recursion, must not be different from the
    # call above
    res = ds.diff(recursive=True,
                  fr=head_ref + '~1',
                  to=head_ref,
                  result_renderer=None)
    assert_result_count(_dirty_results(res), 1)
    # last change in parent
    assert_result_count(res,
                        1,
                        action='diff',
                        state='added',
                        path=op.join(ds.path, 'onefile'),
                        type='file')

    if ds.repo.is_managed_branch():
        raise SkipTest(
            "Test assumption broken: https://github.com/datalad/datalad/issues/3818"
        )
    # one further back brings in the modified subdataset, and the added file
    # within it
    res = ds.diff(recursive=True,
                  fr=head_ref + '~2',
                  to=head_ref,
                  result_renderer=None)
    assert_result_count(_dirty_results(res), 3)
    assert_result_count(res,
                        1,
                        action='diff',
                        state='added',
                        path=op.join(ds.path, 'onefile'),
                        type='file')
    assert_result_count(res,
                        1,
                        action='diff',
                        state='added',
                        path=op.join(sub.path, 'twofile'),
                        type='file')
    assert_result_count(res,
                        1,
                        action='diff',
                        state='modified',
                        path=sub.path,
                        type='dataset')
Example #53
0
def test_publish_recursive(pristine_origin, origin_path, src_path, dst_path,
                           sub1_pub, sub2_pub):

    # we will be publishing back to origin, so to not alter testrepo
    # we will first clone it
    origin = install(origin_path, source=pristine_origin, recursive=True)
    # prepare src
    source = install(src_path, source=origin.path, recursive=True)
    # we will be trying to push into this later on, need to give permissions...
    origin_sub2 = Dataset(opj(origin_path, '2'))
    origin_sub2.config.set('receive.denyCurrentBranch',
                           'updateInstead',
                           where='local')
    ## TODO this manual fixup is needed due to gh-1548 -- needs proper solution
    #os.remove(opj(origin_sub2.path, '.git'))
    #os.rename(opj(origin_path, '.git', 'modules', '2'), opj(origin_sub2.path, '.git'))

    # create plain git at target:
    target = GitRepo(dst_path, create=True)
    target.checkout("TMP", ["-b"])
    source.repo.add_remote("target", dst_path)

    # subdatasets have no remote yet, so recursive publishing should fail:
    res = publish(dataset=source,
                  to="target",
                  recursive=True,
                  on_failure='ignore')
    assert_result_count(res, 3)
    assert_result_count(res, 1, status='ok', type='dataset', path=source.path)
    assert_result_count(res,
                        2,
                        status='error',
                        message=("Unknown target sibling '%s' for publication",
                                 'target'))

    # now, set up targets for the submodules:
    sub1_target = GitRepo(sub1_pub, create=True)
    sub1_target.checkout("TMP", ["-b"])
    sub2_target = AnnexRepo(sub2_pub, create=True)
    # we will be testing presence of the file content, so let's make it progress
    sub2_target.config.set('receive.denyCurrentBranch',
                           'updateInstead',
                           where='local')
    sub1 = GitRepo(opj(src_path, 'subm 1'), create=False)
    sub2 = GitRepo(opj(src_path, '2'), create=False)
    sub1.add_remote("target", sub1_pub)
    sub2.add_remote("target", sub2_pub)

    # publish recursively
    with swallow_logs(new_level=logging.DEBUG) as cml:
        res = publish(dataset=source, to="target", recursive=True)
        assert_not_in('forced update', cml.out,
                      "we probably haven't merged git-annex before pushing")

    # testing result list
    # base dataset was already published above, notneeded again
    assert_status(('ok', 'notneeded'), res)  # nothing failed
    assert_result_count(res, 3, type='dataset')
    eq_({r['path'] for r in res}, {src_path, sub1.path, sub2.path})

    eq_(list(target.get_branch_commits("master")),
        list(source.repo.get_branch_commits("master")))
    eq_(list(target.get_branch_commits("git-annex")),
        list(source.repo.get_branch_commits("git-annex")))
    eq_(list(sub1_target.get_branch_commits("master")),
        list(sub1.get_branch_commits("master")))
    eq_(list(sub1_target.get_branch_commits("git-annex")),
        list(sub1.get_branch_commits("git-annex")))
    eq_(list(sub2_target.get_branch_commits("master")),
        list(sub2.get_branch_commits("master")))
    eq_(list(sub2_target.get_branch_commits("git-annex")),
        list(sub2.get_branch_commits("git-annex")))

    # we are tracking origin but origin has different git-annex, since we
    # cloned from it, so it is not aware of our git-annex
    neq_(list(origin.repo.get_branch_commits("git-annex")),
         list(source.repo.get_branch_commits("git-annex")))
    # So if we first publish to it recursively, we would update
    # all sub-datasets since git-annex branch would need to be pushed
    res_ = publish(dataset=source, recursive=True)
    assert_result_count(res_, 1, status='ok', path=source.path)
    assert_result_count(res_, 1, status='ok', path=sub1.path)
    assert_result_count(res_, 1, status='ok', path=sub2.path)
    # and now should carry the same state for git-annex
    eq_(list(origin.repo.get_branch_commits("git-annex")),
        list(source.repo.get_branch_commits("git-annex")))

    # test for publishing with  --since.  By default since no changes, nothing pushed
    res_ = publish(dataset=source, recursive=True)
    assert_result_count(res_, 3, status='notneeded', type='dataset')

    # still nothing gets pushed, because origin is up to date
    res_ = publish(dataset=source, recursive=True, since='HEAD^')
    assert_result_count(res_, 3, status='notneeded', type='dataset')

    # and we should not fail if we run it from within the dataset
    with chpwd(source.path):
        res_ = publish(recursive=True, since='HEAD^')
        assert_result_count(res_, 3, status='notneeded', type='dataset')

    # Let's now update one subm
    with open(opj(sub2.path, "file.txt"), 'w') as f:
        f.write('')
    # add to subdataset, does not alter super dataset!
    # MIH: use `to_git` because original test author used
    # and explicit `GitRepo.add` -- keeping this for now
    Dataset(sub2.path).add('file.txt', to_git=True)

    # Let's now update one subm
    create_tree(sub2.path, {'file.dat': 'content'})
    # add to subdataset, without reflecting the change in its super(s)
    Dataset(sub2.path).add('file.dat')

    # note: will publish to origin here since that is what it tracks
    res_ = publish(dataset=source, recursive=True, on_failure='ignore')
    ## only updates published, i.e. just the subdataset, super wasn't altered
    ## nothing copied!
    assert_status(('ok', 'notneeded'), res_)
    assert_result_count(res_, 1, status='ok', path=sub2.path, type='dataset')
    assert_result_count(res_, 0, path=opj(sub2.path, 'file.dat'), type='file')

    # since published to origin -- destination should not get that file
    nok_(lexists(opj(sub2_target.path, 'file.dat')))
    res_ = publish(dataset=source, to='target', recursive=True)
    assert_status(('ok', 'notneeded'), res_)
    assert_result_count(res_, 1, status='ok', path=sub2.path, type='dataset')
    assert_result_count(res_, 0, path=opj(sub2.path, 'file.dat'), type='file')

    # Note: with updateInstead only in target2 and not saving change in
    # super-dataset we would have made remote dataset, if we had entire
    # hierarchy, to be somewhat inconsistent.
    # But here, since target datasets are independent -- it is ok

    # and the file itself was transferred
    ok_(lexists(opj(sub2_target.path, 'file.dat')))
    nok_(sub2_target.file_has_content('file.dat'))

    ## but now we can redo publish recursively, with explicitly requested data transfer
    res_ = publish(dataset=source,
                   to='target',
                   recursive=True,
                   transfer_data='all')
    ok_(sub2_target.file_has_content('file.dat'))
    assert_result_count(res_, 1, status='ok', path=opj(sub2.path, 'file.dat'))

    # Let's save those present changes and publish while implying "since last
    # merge point"
    source.save(message="Changes in subm2")
    # and test if it could deduce the remote/branch to push to
    source.config.set('branch.master.remote', 'target', where='local')
    with chpwd(source.path):
        res_ = publish(since='', recursive=True)
    # TODO: somehow test that there were no even attempt to diff within "subm 1"
    # since if `--since=''` worked correctly, nothing has changed there and it
    # should have not been even touched
    assert_status(('ok', 'notneeded'), res_)
    assert_result_count(res_, 1, status='ok', path=source.path, type='dataset')
Example #54
0
def test_force_datatransfer(srcpath, dstpath):
    src = Dataset(srcpath).create()
    target = mk_push_target(src, 'target', dstpath, annex=True, bare=True)
    (src.pathobj / 'test_mod_annex_file').write_text("Heavy stuff.")
    src.save(to_git=False, message="New annex file")
    assert_repo_status(src.path, annex=True)
    whereis_prior = src.repo.whereis(files=['test_mod_annex_file'])[0]

    res = src.push(to='target', force='no-datatransfer')
    # nothing reported to be copied
    assert_not_in_results(res, action='copy')
    # we got the git-push nevertheless
    eq_(src.repo.get_hexsha('master'), target.get_hexsha('master'))
    # nothing moved
    eq_(whereis_prior, src.repo.whereis(files=['test_mod_annex_file'])[0])

    # now a push without forced no-transfer
    # we do not give since, so the non-transfered file is picked up
    # and transferred
    res = src.push(to='target', force=None)
    # no branch change, done before
    assert_in_results(res,
                      action='publish',
                      status='notneeded',
                      refspec='refs/heads/master:refs/heads/master')
    # but availability update
    assert_in_results(res,
                      action='publish',
                      status='ok',
                      refspec='refs/heads/git-annex:refs/heads/git-annex')
    assert_in_results(res,
                      status='ok',
                      path=str(src.pathobj / 'test_mod_annex_file'),
                      action='copy')
    # whereis info reflects the change
    ok_(
        len(whereis_prior) < len(
            src.repo.whereis(files=['test_mod_annex_file'])[0]))

    # do it yet again will do nothing, because all is uptodate
    assert_status('notneeded', src.push(to='target', force=None))
    # an explicit reference point doesn't change that
    assert_status('notneeded', src.push(to='target',
                                        force=None,
                                        since='HEAD~1'))

    # now force data transfer
    res = src.push(to='target', force='datatransfer')
    # no branch change, done before
    assert_in_results(res,
                      action='publish',
                      status='notneeded',
                      refspec='refs/heads/master:refs/heads/master')
    # no availability update
    assert_in_results(res,
                      action='publish',
                      status='notneeded',
                      refspec='refs/heads/git-annex:refs/heads/git-annex')
    # but data transfer
    assert_in_results(res,
                      status='ok',
                      path=str(src.pathobj / 'test_mod_annex_file'),
                      action='copy')

    # force data transfer, but data isn't available
    src.repo.drop('test_mod_annex_file')
    res = src.push(to='target',
                   path='.',
                   force='datatransfer',
                   on_failure='ignore')
    assert_in_results(res,
                      status='impossible',
                      path=str(src.pathobj / 'test_mod_annex_file'),
                      action='copy',
                      message='Slated for transport, but no content present')
Example #55
0
def _test_bare_git_version_2(host, dspath, store):
    # Similarly to test_bare_git_version_1, this should ensure a bare git repo
    # at the store location for a dataset doesn't conflict with the ORA remote.
    # Note: Usability of git remote by annex depends on dataset layout version
    #       (dirhashlower vs. -mixed).
    #       For version 2 (mixed) upload via ORA and consumption via git should
    #       work. But not the other way around, since git-annex uses
    #       dirhashlower with bare repos.

    ds_path = Path(dspath)
    store = Path(store)
    ds = Dataset(ds_path).create()
    populate_dataset(ds)
    ds.save()

    bare_repo_path, _, _ = get_layout_locations(1, store, ds.id)
    # Use git to make sure the remote end is what git thinks a bare clone of it
    # should look like
    subprocess.run([
        'git', 'clone', '--bare',
        quote_cmdlinearg(str(dspath)),
        quote_cmdlinearg(str(bare_repo_path))
    ])

    if host:
        url = "ria+ssh://{host}{path}".format(host=host, path=store)
    else:
        url = "ria+{}".format(store.as_uri())
    init_opts = common_init_opts + ['url={}'.format(url)]
    # set up store:
    io = SSHRemoteIO(host) if host else LocalIO()
    create_store(io, store, '1')
    # set up the dataset location, too.
    # Note: Dataset layout version 2 (dirhash mixed):
    create_ds_in_store(io, store, ds.id, '2', '1')

    # Now, let's have the bare repo as a git remote
    git_url = "ssh://{host}{path}".format(host=host, path=bare_repo_path) \
        if host else bare_repo_path.as_uri()
    ds.repo.add_remote('bare-git', git_url)
    ds.repo.enable_remote('bare-git')
    # and the ORA remote in addition:
    ds.repo.init_remote('ora-remote', options=init_opts)
    # upload keys via ORA:
    ds.repo.copy_to('.', 'ora-remote')
    # bare-git doesn't know yet:
    eq_(len(ds.repo.whereis('one.txt')), 2)
    # fsck to make availability known
    assert_status('ok', [
        annexjson2result(r, ds)
        for r in ds.repo.fsck(remote='bare-git', fast=True)
    ])
    eq_(len(ds.repo.whereis('one.txt')), 3)
    ds.drop('.')
    eq_(len(ds.repo.whereis('one.txt')), 2)
    # actually consumable via git remote:
    ds.repo.call_annex(['move', 'one.txt', '--from', 'bare-git'])
    eq_(len(ds.repo.whereis('one.txt')), 2)
    # now, move back via git - shouldn't be consumable via ORA
    ds.repo.call_annex(['move', 'one.txt', '--to', 'bare-git'])
    # fsck to make availability known, but there's nothing from POV of ORA:
    fsck_res = [
        annexjson2result(r, ds)
        for r in ds.repo.fsck(remote='ora-remote', fast=True)
    ]
    assert_result_count(fsck_res,
                        1,
                        status='error',
                        message='** Based on the location log, one.txt\n'
                        '** was expected to be present, '
                        'but its content is missing.')
    assert_result_count(fsck_res, 1, status='ok')
    eq_(len(fsck_res), 2)
    eq_(len(ds.repo.whereis('one.txt')), 1)
Example #56
0
def test_dicom(path):
    ds = Dataset(path).create()
    ds.config.add('datalad.metadata.nativetype', 'dicom', where='dataset')
    copy(
        op.join(op.dirname(op.dirname(op.dirname(__file__))), 'tests', 'data',
                'files', 'dicom.dcm'), path)
    ds.save()
    ok_clean_git(ds.path)
    res = ds.aggregate_metadata()
    assert_status('ok', res)
    # query for the file metadata
    res = ds.metadata('dicom.dcm')
    assert_result_count(res, 1)
    # from this extractor
    meta = res[0]['metadata']['dicom']
    assert_in('@context', meta)
    # no point in testing ALL keys, but we got plenty
    assert (len(meta.keys()) > 70)
    eq_(meta['SeriesDate'], '20070205')
    # Actually a tricky one of the dcm.multival.MultiValue type
    # which we should extract as a list
    # https://github.com/datalad/datalad-neuroimaging/issues/49
    eq_(meta['ImageType'], ['ORIGINAL', 'PRIMARY', 'EPI', 'NONE'])
    # make sure we have PatientName -- this is not using a basic data type, but
    # dicom.valuerep.PersonName3 -- conversion should have handled that
    # we can only test if the key is there, the source dicom has an empty
    # string as value
    eq_(meta['PatientName'], '')

    # now ask for the dataset metadata, which should have both the unique props
    # and a list of imageseries (one in this case, but a list)
    res = ds.metadata(reporton='datasets')
    assert_result_count(res, 1)
    dsmeta = res[0]['metadata']['dicom']
    # same context
    assert_dict_equal(meta['@context'], dsmeta['@context'])
    meta.pop('@context')
    seriesmeta = dsmeta['Series']
    eq_(seriesmeta[0].pop('SeriesDirectory'), op.curdir)
    eq_(dsmeta['Series'], [meta])

    # for this artificial case pretty much the same info also comes out as
    # unique props, but wrapped in lists
    ucp = res[0]['metadata']["datalad_unique_content_properties"]['dicom']
    assert_dict_equal(
        {
            k: [v]
            for k, v in dsmeta['Series'][0].items()
            if k not in DicomExtractor._unique_exclude and k in ucp
        }, {
            k: v
            for k, v in ucp.items() if k not in DicomExtractor._unique_exclude
        })

    # buuuut, if we switch of file-based metadata storage
    ds.config.add('datalad.metadata.aggregate-content-dicom',
                  'false',
                  where='dataset')
    ds.aggregate_metadata()
    res = ds.metadata(reporton='datasets')

    if not datalad_extracts_annex_key:
        # the auto-uniquified bits are gone but the Series description stays
        assert_not_in("datalad_unique_content_properties", res[0]['metadata'])
    eq_(dsmeta['Series'], [meta])
Example #57
0
def _test_bare_git_version_1(host, dspath, store):
    # This test should take a dataset and create a bare repository at the remote
    # end from it.
    # Given, that it is placed correctly within a tree of dataset, that remote
    # thing should then be usable as an ora-remote as well as as a git-type
    # remote.
    # Note: Usability of git remote by annex depends on dataset layout version
    #       (dirhashlower vs. -mixed).
    #       For version 1 (lower) upload and consumption should be
    #       interchangeable. It doesn't matter which remote is used for what
    #       direction.
    ds_path = Path(dspath)
    store = Path(store)
    ds = Dataset(ds_path).create()
    populate_dataset(ds)
    ds.save()

    bare_repo_path, _, _ = get_layout_locations(1, store, ds.id)
    # Use git to make sure the remote end is what git thinks a bare clone of it
    # should look like
    subprocess.run([
        'git', 'clone', '--bare',
        quote_cmdlinearg(str(dspath)),
        quote_cmdlinearg(str(bare_repo_path))
    ])

    if host:
        url = "ria+ssh://{host}{path}".format(host=host, path=store)
    else:
        url = "ria+{}".format(store.as_uri())
    init_opts = common_init_opts + ['url={}'.format(url)]
    # set up store:
    io = SSHRemoteIO(host) if host else LocalIO()
    create_store(io, store, '1')
    # set up the dataset location, too.
    # Note: Dataset layout version 1 (dirhash lower):
    create_ds_in_store(io, store, ds.id, '1', '1')

    # Now, let's have the bare repo as a git remote and use it with annex
    git_url = "ssh://{host}{path}".format(host=host, path=bare_repo_path) \
        if host else bare_repo_path.as_uri()
    ds.repo.add_remote('bare-git', git_url)
    ds.repo.enable_remote('bare-git')

    # copy files to the remote
    ds.repo.copy_to('.', 'bare-git')
    eq_(len(ds.repo.whereis('one.txt')), 2)

    # now we can drop all content locally, reobtain it, and survive an
    # fsck
    ds.drop('.')
    ds.get('.')
    assert_status('ok', [annexjson2result(r, ds) for r in ds.repo.fsck()])

    # Now, add the ora remote:
    ds.repo.init_remote('ora-remote', options=init_opts)
    # fsck to make availability known
    assert_status('ok', [
        annexjson2result(r, ds)
        for r in ds.repo.fsck(remote='ora-remote', fast=True)
    ])
    eq_(len(ds.repo.whereis('one.txt')), 3)

    # Now move content from git-remote to local and see it not being available
    # via bare-git anymore.
    ds.repo.call_annex(['move', '--all', '--from=bare-git'])
    # ora-remote doesn't know yet:
    eq_(len(ds.repo.whereis('one.txt')), 2)

    # But after fsck it does:
    fsck_res = [
        annexjson2result(r, ds)
        for r in ds.repo.fsck(remote='ora-remote', fast=True)
    ]
    assert_result_count(fsck_res,
                        1,
                        status='error',
                        message='** Based on the location log, one.txt\n'
                        '** was expected to be present, '
                        'but its content is missing.')
    assert_result_count(fsck_res,
                        1,
                        status='error',
                        message='** Based on the location log, subdir/two\n'
                        '** was expected to be present, '
                        'but its content is missing.')
    eq_(len(ds.repo.whereis('one.txt')), 1)
    # and the other way around: upload via ora-remote and have it available via
    # git-remote:
    ds.repo.copy_to('.', 'ora-remote')
    # fsck to make availability known
    assert_status('ok', [
        annexjson2result(r, ds)
        for r in ds.repo.fsck(remote='bare-git', fast=True)
    ])
    eq_(len(ds.repo.whereis('one.txt')), 3)
Example #58
0
def test_plugin_call(path, dspath):
    # make plugins
    create_tree(
        path,
        {
            'dlplugin_dummy.py': dummy_plugin,
            'dlplugin_nodocs.py': nodocs_plugin,
            'dlplugin_broken.py': broken_plugin,
        })
    fake_dummy_spec = {
        'dummy': {'file': opj(path, 'dlplugin_dummy.py')},
        'nodocs': {'file': opj(path, 'dlplugin_nodocs.py')},
        'broken': {'file': opj(path, 'dlplugin_broken.py')},
    }

    with patch('datalad.plugin._get_plugins', return_value=fake_dummy_spec):
        with swallow_outputs() as cmo:
            plugin(showplugininfo=True)
            # hyphen spacing depends on the longest plugin name!
            # sorted
            # summary list generation doesn't actually load plugins for speed,
            # hence broken is not known to be broken here
            eq_(cmo.out,
                "broken [no synopsis] ({})\ndummy  - real dummy ({})\nnodocs [no synopsis] ({})\n".format(
                    fake_dummy_spec['broken']['file'],
                    fake_dummy_spec['dummy']['file'],
                    fake_dummy_spec['nodocs']['file']))
        with swallow_outputs() as cmo:
            plugin(['dummy'], showpluginhelp=True)
            eq_(cmo.out.rstrip(), "Usage: dummy(dataset, noval, withval='test')\n\nmydocstring")
        with swallow_outputs() as cmo:
            plugin(['nodocs'], showpluginhelp=True)
            eq_(cmo.out.rstrip(), "Usage: nodocs()\n\nThis plugin has no documentation")
        # loading fails, no docs
        assert_raises(ValueError, plugin, ['broken'], showpluginhelp=True)

    # assume this most obscure plugin name is not used
    assert_raises(ValueError, plugin, '32sdfhvz984--^^')

    # broken plugin argument, must match Python keyword arg
    # specs
    assert_raises(ValueError, plugin, ['dummy', '1245'])

    def fake_is_installed(*args, **kwargs):
        return True
    with patch('datalad.plugin._get_plugins', return_value=fake_dummy_spec), \
        patch('datalad.distribution.dataset.Dataset.is_installed', return_value=True):
        # does not trip over unsupported argument, they get filtered out, because
        # we carry all kinds of stuff
        with swallow_logs(new_level=logging.WARNING) as cml:
            res = list(plugin(['dummy', 'noval=one', 'obscure=some']))
            assert_status('ok', res)
            cml.assert_logged(
                msg=".*Ignoring plugin argument\\(s\\).*obscure.*, not supported by plugin.*",
                regex=True, level='WARNING')
        # fails on missing positional arg
        assert_raises(TypeError, plugin, ['dummy'])
        # positional and kwargs actually make it into the plugin
        res = list(plugin(['dummy', 'noval=one', 'withval=two']))[0]
        eq_('one', res['args']['noval'])
        eq_('two', res['args']['withval'])
        # kwarg defaults are preserved
        res = list(plugin(['dummy', 'noval=one']))[0]
        eq_('test', res['args']['withval'])
        # repeated specification yields list input
        res = list(plugin(['dummy', 'noval=one', 'noval=two']))[0]
        eq_(['one', 'two'], res['args']['noval'])
        # can do the same thing  while bypassing argument parsing for calls
        # from within python, and even preserve native python dtypes
        res = list(plugin(['dummy', ('noval', 1), ('noval', 'two')]))[0]
        eq_([1, 'two'], res['args']['noval'])
        # and we can further simplify in this case by passing lists right
        # away
        res = list(plugin(['dummy', ('noval', [1, 'two'])]))[0]
        eq_([1, 'two'], res['args']['noval'])

    # dataset arg handling
    # run plugin that needs a dataset where there is none
    with patch('datalad.plugin._get_plugins', return_value=fake_dummy_spec):
        ds = None
        with chpwd(dspath):
            assert_raises(ValueError, plugin, ['dummy', 'noval=one'])
            # create a dataset here, fixes the error
            ds = create()
            res = list(plugin(['dummy', 'noval=one']))[0]
            # gives dataset instance
            eq_(ds, res['args']['dataset'])
        # no do again, giving the dataset path
        # but careful, `dataset` is a proper argument
        res = list(plugin(['dummy', 'noval=one'], dataset=dspath))[0]
        eq_(ds, res['args']['dataset'])
        # however, if passed alongside the plugins args it also works
        res = list(plugin(['dummy', 'dataset={}'.format(dspath), 'noval=one']))[0]
        eq_(ds, res['args']['dataset'])
        # but if both are given, the proper args takes precedence
        assert_raises(ValueError, plugin, ['dummy', 'dataset={}'.format(dspath), 'noval=one'],
                      dataset='rubbish')
Example #59
0
def test_get_recurse_subdatasets(src, path):

    ds = install(path,
                 source=src,
                 result_xfm='datasets',
                 return_type='item-or-list')

    # ask for the two subdatasets specifically. This will obtain them,
    # but not any content of any files in them
    subds1, subds2 = ds.get(['subm 1', '2'],
                            get_data=False,
                            description="youcouldnotmakethisup",
                            result_xfm='datasets')
    for d in (subds1, subds2):
        eq_(d.repo.get_description(), 'youcouldnotmakethisup')

    # there are 3 files to get: test-annex.dat within each dataset:
    rel_path_sub1 = opj(basename(subds1.path), 'test-annex.dat')
    rel_path_sub2 = opj(basename(subds2.path), 'test-annex.dat')
    annexed_files = {'test-annex.dat', rel_path_sub1, rel_path_sub2}

    # None of them is currently present:
    ok_(ds.repo.file_has_content('test-annex.dat') is False)
    ok_(subds1.repo.file_has_content('test-annex.dat') is False)
    ok_(subds2.repo.file_has_content('test-annex.dat') is False)

    assert_repo_status(subds1.path)
    # explicitly given path in subdataset => implicit recursion:
    # MIH: Nope, we fulfill the dataset handle, but that doesn't
    #      imply fulfilling all file handles
    result = ds.get(rel_path_sub1, recursive=True)
    # the subdataset was already present
    assert_in_results(result,
                      type='dataset',
                      path=subds1.path,
                      status='notneeded')
    # we got the file
    assert_in_results(result, path=opj(ds.path, rel_path_sub1), status='ok')

    assert_in_results(result, path=opj(ds.path, rel_path_sub1), status='ok')
    ok_(subds1.repo.file_has_content('test-annex.dat') is True)

    # drop it:
    subds1.repo.drop('test-annex.dat')
    ok_(subds1.repo.file_has_content('test-annex.dat') is False)

    # now, with a path not explicitly pointing within a
    # subdataset, but recursive option:
    # get everything:
    result = ds.get(recursive=True,
                    result_filter=lambda x: x.get('type') != 'dataset')
    assert_status('ok', result)

    eq_(
        set([
            item.get('path')[len(ds.path) + 1:] for item in result
            if item['type'] == 'file'
        ]), annexed_files)
    ok_(ds.repo.file_has_content('test-annex.dat') is True)
    ok_(subds1.repo.file_has_content('test-annex.dat') is True)
    ok_(subds2.repo.file_has_content('test-annex.dat') is True)

    # drop them:
    ds.repo.drop('test-annex.dat')
    subds1.repo.drop('test-annex.dat')
    subds2.repo.drop('test-annex.dat')
    ok_(ds.repo.file_has_content('test-annex.dat') is False)
    ok_(subds1.repo.file_has_content('test-annex.dat') is False)
    ok_(subds2.repo.file_has_content('test-annex.dat') is False)

    # now, the very same call, but without recursive:
    result = ds.get('.', recursive=False)
    assert_status('ok', result)
    # no duplicate reporting on subdataset install and annex-get of its
    # directory
    eq_(len(result), 1)
    assert_result_count(result,
                        1,
                        path=opj(ds.path, 'test-annex.dat'),
                        status='ok')
    ok_(ds.repo.file_has_content('test-annex.dat') is True)
    ok_(subds1.repo.file_has_content('test-annex.dat') is False)
    ok_(subds2.repo.file_has_content('test-annex.dat') is False)
Example #60
0
def check_push(annex, src_path, dst_path):
    # prepare src
    src = Dataset(src_path).create(annex=annex)
    src_repo = src.repo
    # push should not add branches to the local dataset
    orig_branches = src_repo.get_branches()
    assert_not_in('synced/master', orig_branches)

    res = src.push(on_failure='ignore')
    assert_result_count(res, 1)
    assert_in_results(
        res,
        status='impossible',
        message='No push target given, and none could be auto-detected, '
        'please specific via --to')
    eq_(orig_branches, src_repo.get_branches())
    # target sibling
    target = mk_push_target(src, 'target', dst_path, annex=annex)
    eq_(orig_branches, src_repo.get_branches())

    res = src.push(to="target")
    eq_(orig_branches, src_repo.get_branches())
    assert_result_count(res, 2 if annex else 1)
    assert_in_results(res,
                      action='publish',
                      status='ok',
                      target='target',
                      refspec='refs/heads/master:refs/heads/master',
                      operations=['new-branch'])

    assert_repo_status(src_repo, annex=annex)
    eq_(list(target.get_branch_commits_("master")),
        list(src_repo.get_branch_commits_("master")))

    # configure a default merge/upstream target
    src.config.set('branch.master.remote', 'target', where='local')
    src.config.set('branch.master.merge', 'master', where='local')

    # don't fail when doing it again, no explicit target specification
    # needed anymore
    res = src.push()
    eq_(orig_branches, src_repo.get_branches())
    # and nothing is pushed
    assert_status('notneeded', res)

    assert_repo_status(src_repo, annex=annex)
    eq_(list(target.get_branch_commits_("master")),
        list(src_repo.get_branch_commits_("master")))

    # some modification:
    (src.pathobj / 'test_mod_file').write_text("Some additional stuff.")
    src.save(to_git=True, message="Modified.")
    (src.pathobj / 'test_mod_annex_file').write_text("Heavy stuff.")
    src.save(to_git=not annex, message="Modified again.")
    assert_repo_status(src_repo, annex=annex)

    res = src.push(to='target', since="HEAD~2", jobs=2)
    assert_in_results(
        res,
        action='publish',
        status='ok',
        target='target',
        refspec='refs/heads/master:refs/heads/master',
        # we get to see what happened
        operations=['fast-forward'])
    if annex:
        # we got to see the copy result for the annexed files
        assert_in_results(res,
                          action='copy',
                          status='ok',
                          path=str(src.pathobj / 'test_mod_annex_file'))
        # we published, so we can drop and reobtain
        ok_(src_repo.file_has_content('test_mod_annex_file'))
        src_repo.drop('test_mod_annex_file')
        ok_(not src_repo.file_has_content('test_mod_annex_file'))
        src_repo.get('test_mod_annex_file')
        ok_(src_repo.file_has_content('test_mod_annex_file'))
        ok_file_has_content(src_repo.pathobj / 'test_mod_annex_file',
                            'Heavy stuff.')

    eq_(list(target.get_branch_commits_("master")),
        list(src_repo.get_branch_commits_("master")))
    if not (annex and src_repo.is_managed_branch()):
        # the following doesn't make sense in managed branches, because
        # a commit that could be amended is no longer the last commit
        # of a branch after a sync has happened (which did happen
        # during the last push above

        # amend and change commit msg in order to test for force push:
        src_repo.commit("amended", options=['--amend'])
        # push should be rejected (non-fast-forward):
        res = src.push(to='target', since='HEAD~2', on_failure='ignore')
        # fails before even touching the annex branch
        assert_result_count(res, 1)
        assert_in_results(res,
                          action='publish',
                          status='error',
                          target='target',
                          refspec='refs/heads/master:refs/heads/master',
                          operations=['rejected', 'error'])
        # push with force=True works:
        res = src.push(to='target', since='HEAD~2', force='gitpush')
        assert_in_results(res,
                          action='publish',
                          status='ok',
                          target='target',
                          refspec='refs/heads/master:refs/heads/master',
                          operations=['forced-update'])
        eq_(list(target.get_branch_commits_("master")),
            list(src_repo.get_branch_commits_("master")))

    # we do not have more branches than we had in the beginning
    # in particular no 'synced/master'
    eq_(orig_branches, src_repo.get_branches())