Exemple #1
0
def test_audio(path):
    ds = Dataset(path).create()
    ds.config.add('datalad.metadata.nativetype', 'audio', where='dataset')
    copy(
        opj(dirname(dirname(dirname(__file__))), 'tests', 'data', 'audio.mp3'),
        path)
    ds.add('.')
    ok_clean_git(ds.path)
    res = ds.aggregate_metadata()
    assert_status('ok', res)
    res = ds.metadata('audio.mp3')
    assert_result_count(res, 1)

    # from this extractor
    meta = res[0]['metadata']['audio']
    for k, v in target.items():
        eq_(meta[k], v)

    assert_in('@context', meta)

    uniques = ds.metadata(
        reporton='datasets', return_type='item-or-list')['metadata']['datalad_unique_content_properties']
    # test file has it, but uniques have it blanked out, because the extractor considers it worthless
    # for discovering whole datasets
    assert_in('bitrate', meta)
    eq_(uniques['audio']['bitrate'], None)

    # 'date' field carries not value, hence gets exclude from the unique report
    assert_in('date', meta)
    assert(not meta['date'])
    assert_not_in('date', uniques['audio'])
Exemple #2
0
def test_diff_nonexistent_ref_unicode(path):
    ds = Dataset(path).create()
    assert_result_count(
        ds.diff(fr="HEAD", to=u"β", on_failure="ignore"),
        1,
        path=ds.path,
        status="impossible")
Exemple #3
0
def test_add_files(path):
    ds = Dataset(path).create(force=True)

    test_list_1 = ['test_annex.txt']
    test_list_2 = ['test.txt']
    test_list_3 = ['test1.dat', 'test2.dat']
    test_list_4 = [op.join('dir', 'testindir'),
                   op.join('dir', OBSCURE_FILENAME)]

    for arg in [(test_list_1[0], False),
                (test_list_2[0], True),
                (test_list_3, False),
                (test_list_4, False)]:
        # special case 4: give the dir:
        if arg[0] == test_list_4:
            result = ds.save('dir', to_git=arg[1])
            status = ds.repo.annexstatus(['dir'])
        else:
            result = ds.save(arg[0], to_git=arg[1])
            for a in assure_list(arg[0]):
                assert_result_count(result, 1, path=text_type(ds.pathobj / a))
            status = ds.repo.get_content_annexinfo(
                ut.Path(p) for p in assure_list(arg[0]))
        for f, p in iteritems(status):
            if arg[1]:
                assert p.get('key', None) is None, f
            else:
                assert p.get('key', None) is not None, f
Exemple #4
0
def test_rerun_just_one_commit(path):
    ds = Dataset(path).create()

    # Check out an orphan branch so that we can test the "one commit
    # in a repo" case.
    ds.repo.checkout("orph", options=["--orphan"])
    ds.repo.repo.git.reset("--hard")
    ds.repo.config.reload()

    ds.run('echo static-content > static')
    assert_result_count(ds.repo.repo.git.rev_list("HEAD").split(), 1)

    # Rerunning with just one commit doesn't raise an error ...
    ds.rerun()
    # ... but we're still at one commit because the content didn't
    # change.
    assert_result_count(ds.repo.repo.git.rev_list("HEAD").split(), 1)

    # We abort rather than trying to do anything when --onto='' and
    # --since='' are given together and the first commit contains a
    # run command.
    ds.repo.commit(msg="empty", options=["--allow-empty"])
    assert_raises(IncompleteResultsError, ds.rerun, since="", onto="")

    # --script propagates the error.
    with swallow_outputs():
        assert_raises(IncompleteResultsError,
                      ds.rerun, since="", onto="", script="-")
    # --dry-run propagates the error.
    assert_raises(IncompleteResultsError,
                  ds.rerun, since="", onto="",
                  report=True, return_type="list")
Exemple #5
0
def test_add_recursive(path):
    # make simple hierarchy
    parent = Dataset(path).create()
    assert_repo_status(parent.path)
    sub1 = parent.create(op.join('down', 'sub1'))
    assert_repo_status(parent.path)
    sub2 = parent.create('sub2')
    # next one make the parent dirty
    subsub = sub2.create('subsub')
    assert_repo_status(parent.path, modified=['sub2'])
    res = parent.save()
    assert_repo_status(parent.path)

    # now add content deep in the hierarchy
    create_tree(subsub.path, {'new': 'empty'})
    assert_repo_status(parent.path, modified=['sub2'])

    # recursive add should not even touch sub1, because
    # it knows that it is clean
    res = parent.save(recursive=True)
    # the key action is done
    assert_result_count(
        res, 1, path=op.join(subsub.path, 'new'), action='add', status='ok')
    # saved all the way up
    assert_result_count(res, 3, action='save', status='ok')
    assert_repo_status(parent.path)
Exemple #6
0
def test_install_skip_list_arguments(src, path, path_outside):
    ds = install(path, source=src)
    ok_(ds.is_installed())

    # install a list with valid and invalid items:
    result = ds.install(
        path=['subm 1', 'not_existing', path_outside, '2'],
        get_data=False,
        on_failure='ignore', result_xfm=None, return_type='list')
    # good and bad results together
    ok_(isinstance(result, list))
    eq_(len(result), 4)
    # check that we have an 'impossible' status for both invalid args
    # but all the other tasks have been accomplished
    for skipped, msg in [(opj(ds.path, 'not_existing'), "path does not exist"),
                         (path_outside, "path not associated with any dataset")]:
        assert_result_count(
            result, 1, status='impossible', message=msg, path=skipped)
    for sub in [Dataset(opj(path, 'subm 1')), Dataset(opj(path, '2'))]:
        assert_result_count(
            result, 1, status='ok',
            message=('Installed subdataset in order to get %s', sub.path))
        ok_(sub.is_installed())

    # return of get is always a list, by default, even if just one thing was gotten
    # in this case 'subm1' was already obtained above, so this will get this
    # content of the subdataset
    with assert_raises(IncompleteResultsError) as cme:
        ds.install(path=['subm 1', 'not_existing'])
    with assert_raises(IncompleteResultsError) as cme:
        ds.get(path=['subm 1', 'not_existing'])
def test_invalid_call(path):
    with chpwd(path):
        # ^ Change directory so that we don't fail with an
        # InvalidGitRepositoryError if the test is executed from a git
        # worktree.

        # needs a SSH URL
        assert_raises(InsufficientArgumentsError, create_sibling, '')
        assert_raises(ValueError, create_sibling, 'http://ignore.me')
        # needs an actual dataset
        assert_raises(
            ValueError,
            create_sibling, 'localhost:/tmp/somewhere', dataset='/nothere')
    # pre-configure a bogus remote
    ds = Dataset(path).create()
    ds.repo.add_remote('bogus', 'http://bogus.url.com')
    # fails to reconfigure by default with generated
    # and also when given an existing name
    for res in (ds.create_sibling('bogus:/tmp/somewhere', on_failure='ignore'),
                ds.create_sibling('localhost:/tmp/somewhere', name='bogus', on_failure='ignore')):
        assert_result_count(
            res, 1,
            status='error',
            message=(
                "sibling '%s' already configured (specify alternative name, or force reconfiguration via --existing",
                'bogus'))
Exemple #8
0
def test_get_mixed_hierarchy(src, path):

    origin = Dataset(src).create(no_annex=True)
    origin_sub = origin.create('subds')
    with open(opj(origin.path, 'file_in_git.txt'), "w") as f:
        f.write('no idea')
    with open(opj(origin_sub.path, 'file_in_annex.txt'), "w") as f:
        f.write('content')
    origin.add('file_in_git.txt', to_git=True)
    origin_sub.add('file_in_annex.txt')
    origin.save()

    # now, install that thing:
    ds, subds = install(
        path, source=src, recursive=True,
        result_xfm='datasets', return_type='item-or-list', result_filter=None)
    ok_(subds.repo.file_has_content("file_in_annex.txt") is False)

    # and get:
    result = ds.get(curdir, recursive=True)
    # git repo and subds
    assert_status(['ok', 'notneeded'], result)
    assert_result_count(
        result, 1, path=opj(subds.path, "file_in_annex.txt"), status='ok')
    ok_(subds.repo.file_has_content("file_in_annex.txt") is True)
Exemple #9
0
def test_our_metadataset_search(tdir):
    # TODO renable when a dataset with new aggregated metadata is
    # available at some public location
    raise SkipTest
    # smoke test for basic search operations on our super-megadataset
    # expensive operation but ok
    #ds = install(
    #    path=tdir,
    #    # TODO renable test when /// metadata actually conforms to the new metadata
    #    #source="///",
    #    source="smaug:/mnt/btrfs/datasets-meta6-4/datalad/crawl",
    #    result_xfm='datasets', return_type='item-or-list')
    assert list(ds.search('haxby'))
    assert_result_count(
        ds.search('id:873a6eae-7ae6-11e6-a6c8-002590f97d84', mode='textblob'),
        1,
        type='dataset',
        path=opj(ds.path, 'crcns', 'pfc-2'))

    # there is a problem with argparse not decoding into utf8 in PY2
    from datalad.cmdline.tests.test_main import run_main
    # TODO: make it into an independent lean test
    from datalad.cmd import Runner
    out, err = Runner(cwd=ds.path)('datalad search Buzsáki')
    assert_in('crcns/pfc-2 ', out)  # has it in description
    # and then another aspect: this entry it among multiple authors, need to
    # check if aggregating them into a searchable entity was done correctly
    assert_in('crcns/hc-1 ', out)
Exemple #10
0
def test_invalid_call(origin, tdir):
    ds = Dataset(origin)
    ds.uninstall('subm 1', check=False)
    # nothing
    assert_status('error', publish('/notthere', on_failure='ignore'))
    # known, but not present
    assert_status('impossible', publish(opj(ds.path, 'subm 1'), on_failure='ignore'))
    # --since without dataset is now supported as long as it
    # could be identified
    # assert_raises(InsufficientArgumentsError, publish, since='HEAD')
    # but if it couldn't be, then should indeed crash
    with chpwd(tdir):
        assert_raises(InsufficientArgumentsError, publish, since='HEAD')
    # new dataset, with unavailable subdataset
    dummy = Dataset(tdir).create()
    dummy_sub = dummy.create('sub')
    dummy_sub.uninstall()
    assert_in('sub', dummy.subdatasets(fulfilled=False, result_xfm='relpaths'))
    # now an explicit call to publish the unavailable subdataset
    assert_result_count(
        dummy.publish('sub', on_failure='ignore'),
        1,
        path=dummy_sub.path,
        status='impossible',
        type='dataset')
Exemple #11
0
def test_kill(path):
    # nested datasets with load
    ds = Dataset(path).create()
    testfile = opj(ds.path, "file.dat")
    with open(testfile, 'w') as f:
        f.write("load")
    ds.save("file.dat")
    subds = ds.create('deep1')
    eq_(sorted(ds.subdatasets(result_xfm='relpaths')), ['deep1'])
    ok_clean_git(ds.path)

    # and we fail to remove since content can't be dropped
    res = ds.remove(on_failure='ignore')
    assert_result_count(
        res, 1,
        status='error', path=testfile)
    # Following two assertions on message are relying on the actual error.
    # We have a second result with status 'impossible' for the ds, that we need
    # to filter out for those assertions:
    err_result = [r for r in res if r['status'] == 'error'][0]
    assert_result_values_cond(
        [err_result], 'message',
        lambda x: "configured minimum number of copies not found" in x or
        "Could only verify the existence of 0 out of 1 necessary copies" in x
    )
    eq_(ds.remove(recursive=True, check=False, result_xfm='datasets'),
        [subds, ds])
    ok_(not exists(path))
Exemple #12
0
def test_uninstall_git_file(path):
    ds = Dataset(path)
    ok_(ds.is_installed())
    ok_(exists(opj(path, 'INFO.txt')))
    ok_file_under_git(ds.repo.path, 'INFO.txt')

    # drop file in Git in an annex repo
    # regardless of the type of repo this is 'notneeded'...
    # it is less about education that about "can we
    # we get the content back?", and for a file in Git we can
    assert_result_count(
        ds.drop(path='INFO.txt'),
        1,
        status='notneeded',
        message="no annex'ed content")

    res = ds.uninstall(path="INFO.txt", on_failure='ignore')
    assert_result_count(
        res, 1,
        status='impossible',
        message='can only uninstall datasets (consider the `drop` command)')

    # remove the file:
    res = ds.remove(path='INFO.txt', result_xfm='paths',
                    result_filter=lambda x: x['action'] == 'remove')
    assert_raises(AssertionError, ok_file_under_git, ds.repo.path, 'INFO.txt')
    ok_(not exists(opj(path, 'INFO.txt')))
    eq_(res, ['INFO.txt'])
Exemple #13
0
def test_clone_isnot_recursive(src, path_nr, path_r):
    ds = clone(src, path_nr, result_xfm='datasets', return_type='item-or-list')
    ok_(ds.is_installed())
    # check nothin is unintentionally installed
    subdss = ds.subdatasets(recursive=True)
    assert_result_count(subdss, len(subdss), state='absent')
    # this also means, subdatasets to be listed as not fulfilled:
    eq_(set(ds.subdatasets(recursive=True, fulfilled=False, result_xfm='relpaths')),
        {'subm 1', '2'})
Exemple #14
0
def test_publish_simple(origin, src_path, dst_path):

    # prepare src
    source = install(src_path, source=origin, recursive=True)
    # forget we cloned it (provide no 'origin' anymore), which should lead to
    # setting tracking branch to target:
    source.repo.remove_remote("origin")

    # create plain git at target:
    target = GitRepo(dst_path, create=True)
    target.checkout("TMP", ["-b"])
    source.repo.add_remote("target", dst_path)

    res = publish(dataset=source, to="target", result_xfm='datasets')
    eq_(res, [source])

    ok_clean_git(source.repo, annex=None)
    ok_clean_git(target, annex=None)
    eq_(list(target.get_branch_commits("master")),
        list(source.repo.get_branch_commits("master")))

    # don't fail when doing it again
    res = publish(dataset=source, to="target")
    # and nothing is pushed
    assert_result_count(res, 1, status='notneeded')

    ok_clean_git(source.repo, annex=None)
    ok_clean_git(target, annex=None)
    eq_(list(target.get_branch_commits("master")),
        list(source.repo.get_branch_commits("master")))
    eq_(list(target.get_branch_commits("git-annex")),
        list(source.repo.get_branch_commits("git-annex")))

    # 'target/master' should be tracking branch at this point, so
    # try publishing without `to`:
    # MIH: Nope, we don't automatically add this anymore

    # some modification:
    with open(opj(src_path, 'test_mod_file'), "w") as f:
        f.write("Some additional stuff.")
    source.add(opj(src_path, 'test_mod_file'), to_git=True,
               message="Modified.")
    ok_clean_git(source.repo, annex=None)

    res = publish(dataset=source, to='target', result_xfm='datasets')
    eq_(res, [source])

    ok_clean_git(dst_path, annex=None)
    eq_(list(target.get_branch_commits("master")),
        list(source.repo.get_branch_commits("master")))
    # Since git-annex 6.20170220, post-receive hook gets triggered
    # which results in entry being added for that repo into uuid.log on remote
    # end since then finally git-annex senses that it needs to init that remote,
    # so it might have 1 more commit than local.
    # see https://github.com/datalad/datalad/issues/1319
    ok_(set(source.repo.get_branch_commits("git-annex")).issubset(
        set(target.get_branch_commits("git-annex"))))
Exemple #15
0
def test_update_fetch_all(src, remote_1, remote_2):
    rmt1 = AnnexRepo.clone(src, remote_1)
    rmt2 = AnnexRepo.clone(src, remote_2)

    ds = Dataset(src)
    ds.siblings('add', name="sibling_1", url=remote_1)
    ds.siblings('add', name="sibling_2", url=remote_2)

    # modify the remotes:
    with open(opj(remote_1, "first.txt"), "w") as f:
        f.write("some file load")
    rmt1.add("first.txt")
    rmt1.commit()
    # TODO: Modify an already present file!

    with open(opj(remote_2, "second.txt"), "w") as f:
        f.write("different file load")
    rmt2.add("second.txt", git=True)
    rmt2.commit(msg="Add file to git.")

    # Let's init some special remote which we couldn't really update/fetch
    if not os.environ.get('DATALAD_TESTS_DATALADREMOTE'):
        ds.repo.init_remote(
            'datalad',
            ['encryption=none', 'type=external', 'externaltype=datalad'])
    # fetch all remotes
    assert_result_count(
        ds.update(), 1, status='ok', type='dataset')

    # no merge, so changes are not in active branch:
    assert_not_in("first.txt",
                  ds.repo.get_files(ds.repo.get_active_branch()))
    assert_not_in("second.txt",
                  ds.repo.get_files(ds.repo.get_active_branch()))
    # but we know the changes in remote branches:
    assert_in("first.txt", ds.repo.get_files("sibling_1/master"))
    assert_in("second.txt", ds.repo.get_files("sibling_2/master"))

    # no merge strategy for multiple remotes yet:
    # more clever now, there is a tracking branch that provides a remote
    #assert_raises(NotImplementedError, ds.update, merge=True)

    # merge a certain remote:
    assert_result_count(
        ds.update(
            sibling='sibling_1', merge=True), 1, status='ok', type='dataset')

    # changes from sibling_2 still not present:
    assert_not_in("second.txt",
                  ds.repo.get_files(ds.repo.get_active_branch()))
    # changes from sibling_1 merged:
    assert_in("first.txt",
              ds.repo.get_files(ds.repo.get_active_branch()))
    # it's known to annex, but has no content yet:
    ds.repo.get_file_key("first.txt")  # raises if unknown
    eq_([False], ds.repo.file_has_content(["first.txt"]))
Exemple #16
0
def test_placeholders(path):
    ds = Dataset(path).create(force=True)
    ds.add(".")
    ds.run("echo {inputs} >{outputs}", inputs=[".", "*.in"], outputs=["c.out"])
    ok_file_has_content(opj(path, "c.out"), "a.in b.in\n")

    hexsha_before = ds.repo.get_hexsha()
    ds.rerun()
    eq_(hexsha_before, ds.repo.get_hexsha())

    ds.run("echo {inputs[0]} >getitem", inputs=["*.in"])
    ok_file_has_content(opj(path, "getitem"), "a.in\n")

    ds.run("echo {pwd} >expanded-pwd")
    ok_file_has_content(opj(path, "expanded-pwd"), path,
                        strip=True)

    ds.run("echo {dspath} >expanded-dspath")
    ok_file_has_content(opj(path, "expanded-dspath"), ds.path,
                        strip=True)

    subdir_path = opj(path, "subdir")
    with chpwd(subdir_path):
        run("echo {pwd} >expanded-pwd")
    ok_file_has_content(opj(path, "subdir", "expanded-pwd"), subdir_path,
                        strip=True)
    eq_(get_run_info(ds, ds.repo.format_commit("%B"))[1]["pwd"],
        "subdir")

    # Double brackets can be used to escape placeholders.
    ds.run("touch {{inputs}}", inputs=["*.in"])
    ok_exists(opj(path, "{inputs}"))

    # rerun --script expands the placeholders.
    with patch("sys.stdout", new_callable=StringIO) as cmout:
        ds.rerun(script="-", since="")
        script_out = cmout.getvalue()
        assert_in("echo a.in b.in >c.out", script_out)
        assert_in("echo {} >expanded-pwd".format(subdir_path),
                  script_out)
        assert_in("echo {} >expanded-dspath".format(ds.path),
                  script_out)

    assert_result_count(
        ds.run("{unknown_placeholder}", on_failure="ignore"),
        1, status="impossible", action="run")

    # Configured placeholders.
    ds.config.add("datalad.run.substitutions.license", "gpl3", where="local")
    ds.run("echo {license} >configured-license")
    ok_file_has_content(opj(path, "configured-license"), "gpl3", strip=True)
    # --script handles configured placeholders.
    with patch("sys.stdout", new_callable=StringIO) as cmout:
        ds.rerun(script="-")
        assert_in("gpl3", cmout.getvalue())
Exemple #17
0
def test_publish_plain_git(origin, src_path, dst_path):
    # TODO: Since it's mostly the same, melt with test_publish_simple

    # prepare src
    source = install(src_path, source=origin, recursive=True)
    # forget we cloned it (provide no 'origin' anymore), which should lead to
    # setting tracking branch to target:
    source.repo.remove_remote("origin")

    # create plain git at target:
    target = GitRepo(dst_path, create=True)
    target.checkout("TMP", ["-b"])
    source.repo.add_remote("target", dst_path)

    res = publish(dataset=source, to="target", result_xfm='datasets')
    eq_(res, [source])

    ok_clean_git(source.repo, annex=None)
    ok_clean_git(target, annex=None)
    eq_(list(target.get_branch_commits("master")),
        list(source.repo.get_branch_commits("master")))

    # don't fail when doing it again
    res = publish(dataset=source, to="target")
    # and nothing is pushed
    assert_result_count(res, 1, status='notneeded')

    ok_clean_git(source.repo, annex=None)
    ok_clean_git(target, annex=None)
    eq_(list(target.get_branch_commits("master")),
        list(source.repo.get_branch_commits("master")))

    # some modification:
    with open(opj(src_path, 'test_mod_file'), "w") as f:
        f.write("Some additional stuff.")
    source.add(opj(src_path, 'test_mod_file'), to_git=True,
               message="Modified.")
    ok_clean_git(source.repo, annex=None)

    res = publish(dataset=source, to='target', result_xfm='datasets')
    eq_(res, [source])

    ok_clean_git(dst_path, annex=None)
    eq_(list(target.get_branch_commits("master")),
        list(source.repo.get_branch_commits("master")))

    # amend and change commit msg in order to test for force push:
    source.repo.commit("amended", options=['--amend'])
    # push should be rejected (non-fast-forward):
    assert_raises(IncompleteResultsError,
                  publish, dataset=source, to='target', result_xfm='datasets')
    # push with force=True works:
    res = publish(dataset=source, to='target', result_xfm='datasets', force=True)
    eq_(res, [source])
Exemple #18
0
def test_get_single_file(path):

    ds = Dataset(path)
    ok_(ds.is_installed())
    ok_(ds.repo.file_has_content('test-annex.dat') is False)
    result = ds.get("test-annex.dat")
    assert_result_count(result, 1)
    assert_status('ok', result)
    eq_(result[0]['path'], opj(ds.path, 'test-annex.dat'))
    eq_(result[0]['annexkey'], ds.repo.get_file_key('test-annex.dat'))
    ok_(ds.repo.file_has_content('test-annex.dat') is True)
Exemple #19
0
def test_update_git_smoke(src_path, dst_path):
    # Apparently was just failing on git repos for basic lack of coverage, hence this quick test
    ds = Dataset(src_path).create(no_annex=True)
    target = install(
        dst_path, source=src_path,
        result_xfm='datasets', return_type='item-or-list')
    create_tree(ds.path, {'file.dat': '123'})
    ds.save('file.dat')
    assert_result_count(
        target.update(recursive=True, merge=True), 1,
        status='ok', type='dataset')
    ok_file_has_content(opj(target.path, 'file.dat'), '123')
Exemple #20
0
def test_newthings_coming_down(originpath, destpath):
    origin = GitRepo(originpath, create=True)
    create_tree(originpath, {'load.dat': 'heavy'})
    Dataset(originpath).save('load.dat')
    ds = install(
        source=originpath, path=destpath,
        result_xfm='datasets', return_type='item-or-list')
    assert_is_instance(ds.repo, GitRepo)
    assert_in('origin', ds.repo.get_remotes())
    # turn origin into an annex
    origin = AnnexRepo(originpath, create=True)
    # clone doesn't know yet
    assert_false(knows_annex(ds.path))
    # but after an update it should
    # no merge, only one sibling, no parameters should be specific enough
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    assert(knows_annex(ds.path))
    # no branches appeared
    eq_(ds.repo.get_branches(), ['master'])
    # now merge, and get an annex
    assert_result_count(ds.update(merge=True), 1, status='ok', type='dataset')
    assert_in('git-annex', ds.repo.get_branches())
    assert_is_instance(ds.repo, AnnexRepo)
    # should be fully functional
    testfname = opj(ds.path, 'load.dat')
    assert_false(ds.repo.file_has_content(testfname))
    ds.get('.')
    ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy')
    # check that a new tag comes down
    origin.tag('first!')
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    eq_(ds.repo.get_tags(output='name')[0], 'first!')

    # and now we destroy the remote annex
    origin._git_custom_command([], ['git', 'config', '--remove-section', 'annex'])
    rmtree(opj(origin.path, '.git', 'annex'), chmod_files=True)
    origin._git_custom_command([], ['git', 'branch', '-D', 'git-annex'])
    origin = GitRepo(originpath)
    assert_false(knows_annex(originpath))

    # and update the local clone
    # for now this should simply not fail (see gh-793), later might be enhanced to a
    # graceful downgrade
    before_branches = ds.repo.get_branches()
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    eq_(before_branches, ds.repo.get_branches())
    # annex branch got pruned
    eq_(['origin/HEAD', 'origin/master'], ds.repo.get_remote_branches())
    # check that a new tag comes down even if repo types mismatch
    origin.tag('second!')
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    eq_(ds.repo.get_tags(output='name')[-1], 'second!')
Exemple #21
0
def test_update_strategy(path):
    base = Dataset(opj(path, 'origin')).create(force=True)
    # force all metadata objects into the annex
    with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f:
        f.write(
            '** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n')
    sub = base.create('sub', force=True)
    subsub = sub.create(opj('subsub'), force=True)
    base.add('.', recursive=True)
    ok_clean_git(base.path)
    # we start clean
    for ds in base, sub, subsub:
        eq_(len(_get_contained_objs(ds)), 0)
    # aggregate the base dataset only, nothing below changes
    base.aggregate_metadata()
    eq_(len(_get_contained_objs(base)), 2)
    for ds in sub, subsub:
        eq_(len(_get_contained_objs(ds)), 0)
    # aggregate the entire tree, but by default only updates
    # the top-level dataset with all objects, none of the leaf
    # or intermediate datasets get's touched
    base.aggregate_metadata(recursive=True)
    eq_(len(_get_contained_objs(base)), 6)
    eq_(len(_get_referenced_objs(base)), 6)
    for ds in sub, subsub:
        eq_(len(_get_contained_objs(ds)), 0)
    res = base.metadata(get_aggregates=True)
    assert_result_count(res, 3)
    # it is impossible to query an intermediate or leaf dataset
    # for metadata
    for ds in sub, subsub:
        assert_status(
            'impossible',
            ds.metadata(get_aggregates=True, on_failure='ignore'))
    # get the full metadata report
    target_meta = base.metadata(return_type='list')

    # now redo full aggregation, this time updating all
    # (intermediate) datasets
    base.aggregate_metadata(recursive=True, update_mode='all')
    eq_(len(_get_contained_objs(base)), 6)
    eq_(len(_get_contained_objs(sub)), 4)
    eq_(len(_get_contained_objs(subsub)), 2)
    # it is now OK to query an intermediate or leaf dataset
    # for metadata
    for ds in sub, subsub:
        assert_status(
            'ok',
            ds.metadata(get_aggregates=True, on_failure='ignore'))

    # all of that has no impact on the reported metadata
    eq_(target_meta, base.metadata(return_type='list'))
Exemple #22
0
def test_clone_report_permission_issue(tdir):
    pdir = _path_(tdir, 'protected')
    mkdir(pdir)
    # make it read-only
    chmod(pdir, 0o555)
    with chpwd(pdir):
        res = clone('///', result_xfm=None, return_type='list', on_failure='ignore')
        assert_status('error', res)
        assert_result_count(
            res, 1, status='error',
            message="could not create work tree dir '%s/%s': Permission denied"
                    % (pdir, get_datasets_topdir())
        )
Exemple #23
0
def test_autoresolve_multiple_datasets(src, path):
    with chpwd(path):
        ds1 = install(
            'ds1', source=src,
            result_xfm='datasets', return_type='item-or-list')
        ds2 = install(
            'ds2', source=src,
            result_xfm='datasets', return_type='item-or-list')
        results = get([opj('ds1', 'test-annex.dat')] + glob(opj('ds2', '*.dat')))
        # each ds has one file
        assert_result_count(results, 2, type='file', action='get', status='ok')
        ok_(ds1.repo.file_has_content('test-annex.dat') is True)
        ok_(ds2.repo.file_has_content('test-annex.dat') is True)
Exemple #24
0
def test_here(path):
    # few smoke tests regarding the 'here' sibling
    ds = create(path)
    res = ds.siblings(
        'query',
        on_failure='ignore',
        result_renderer=None)
    assert_status('ok', res)
    assert_result_count(res, 1)
    assert_result_count(res, 1, name='here')
    here = res[0]
    eq_(ds.repo.uuid, here['annex-uuid'])
    assert_in('annex-description', here)
    assert_in('annex-bare', here)
    assert_in('available_local_disk_space', here)

    # set a description
    res = ds.siblings(
        'configure',
        name='here',
        description='very special',
        on_failure='ignore',
        result_renderer=None)
    assert_status('ok', res)
    assert_result_count(res, 1)
    assert_result_count(res, 1, name='here')
    here = res[0]
    eq_('very special', here['annex-description'])
Exemple #25
0
def _compare_metadata_helper(origres, compds):
    for ores in origres:
        rpath = relpath(ores['path'], ores['refds'])
        cres = compds.metadata(
            rpath,
            reporton='{}s'.format(ores['type']))
        if ores['type'] == 'file':
            # TODO implement file based lookup
            continue
        assert_result_count(cres, 1)
        cres = cres[0]
        assert_dict_equal(ores['metadata'], cres['metadata'])
        if ores['type'] == 'dataset':
            for i in ('dsid', ):
                eq_(ores[i], cres[i])
def test_archive(path):
    ds = Dataset(opj(path, 'ds')).create(force=True)
    ds.save()
    committed_date = ds.repo.get_commit_date()
    default_outname = opj(path, 'datalad_{}.tar.gz'.format(ds.id))
    with chpwd(path):
        res = list(ds.export_archive())
        assert_status('ok', res)
        assert_result_count(res, 1)
        assert(isabs(res[0]['path']))
    assert_true(os.path.exists(default_outname))
    custom_outname = opj(path, 'myexport.tar.gz')
    # feed in without extension
    ds.export_archive(filename=custom_outname[:-7])
    assert_true(os.path.exists(custom_outname))
    custom1_md5 = md5sum(custom_outname)
    # encodes the original archive filename -> different checksum, despit
    # same content
    assert_not_equal(md5sum(default_outname), custom1_md5)
    # should really sleep so if they stop using time.time - we know
    time.sleep(1.1)
    ds.export_archive(filename=custom_outname)
    # should not encode mtime, so should be identical
    assert_equal(md5sum(custom_outname), custom1_md5)

    def check_contents(outname, prefix):
        with tarfile.open(outname) as tf:
            nfiles = 0
            for ti in tf:
                # any annex links resolved
                assert_false(ti.issym())
                ok_startswith(ti.name, prefix + '/')
                assert_equal(ti.mtime, committed_date)
                if '.datalad' not in ti.name:
                    # ignore any files in .datalad for this test to not be
                    # susceptible to changes in how much we generate a meta info
                    nfiles += 1
            # we have exactly four files (includes .gitattributes for default
            # MD5E backend), and expect no content for any directory
            assert_equal(nfiles, 4)
    check_contents(default_outname, 'datalad_%s' % ds.id)
    check_contents(custom_outname, 'myexport')

    # now loose some content
    ds.drop('file_up', check=False)
    assert_raises(IOError, ds.export_archive, filename=opj(path, 'my'))
    ds.export_archive(filename=opj(path, 'partial'), missing_content='ignore')
    assert_true(os.path.exists(opj(path, 'partial.tar.gz')))
Exemple #27
0
def test_status_nods(path, otherpath):
    ds = Dataset(path).create()
    assert_result_count(
        ds.status(path=otherpath, on_failure='ignore'),
        1,
        status='error',
        message='path not underneath this dataset')
    otherds = Dataset(otherpath).create()
    assert_result_count(
        ds.status(path=otherpath, on_failure='ignore'),
        1,
        path=otherds.path,
        status='error',
        message=(
            'dataset containing given paths is not underneath the reference dataset %s: %s',
            ds, [])
        )
Exemple #28
0
def test_gh1597(path):
    ds = Dataset(path).create()
    sub = ds.create('sub', save=False)
    # only staged at this point, but known, and not annexed
    ok_file_under_git(ds.path, '.gitmodules', annexed=False)
    res = ds.subdatasets()
    assert_result_count(res, 1, path=sub.path)
    # now modify .gitmodules with another command
    ds.subdatasets(contains=sub.path, set_property=[('this', 'that')])
    ok_clean_git(ds.path, index_modified=['sub'])
    # now modify low-level
    with open(opj(ds.path, '.gitmodules'), 'a') as f:
        f.write('\n')
    ok_clean_git(ds.path, index_modified=['.gitmodules', 'sub'])
    ds.add('.gitmodules')
    # must not come under annex mangement
    ok_file_under_git(ds.path, '.gitmodules', annexed=False)
Exemple #29
0
def test_smth_about_not_supported(p1, p2):
    source = Dataset(p1).create()
    from datalad.support.network import PathRI
    source.create_sibling(
        'ssh://localhost' + PathRI(p2).posixpath,
        name='target1')
    # source.publish(to='target1')
    with chpwd(p1):
        # since we have only two commits (set backend, init dataset)
        # -- there is no HEAD^^
        assert_result_count(
            publish(to='target1', since='HEAD^^', on_failure='ignore'),
            1,
            status='impossible',
            message="fatal: bad revision 'HEAD^^'")
        # but now let's add one more commit, we should be able to pusblish
        source.repo.commit("msg", options=['--allow-empty'])
        publish(to='target1', since='HEAD^')  # must not fail now
Exemple #30
0
def test_exif(path):
    ds = Dataset(path).create()
    ds.config.add('datalad.metadata.nativetype', 'exif', where='dataset')
    copy(
        opj(dirname(dirname(dirname(__file__))), 'tests', 'data', 'exif.jpg'),
        path)
    ds.save()
    ok_clean_git(ds.path)
    res = ds.aggregate_metadata()
    assert_status('ok', res)
    res = ds.metadata('exif.jpg')
    assert_result_count(res, 1)
    # from this extractor
    meta = res[0]['metadata']['exif']
    for k, v in target.items():
        eq_(meta[k], v)

    assert_in('@context', meta)
Exemple #31
0
def test_within_ds_file_search(path):
    try:
        import nibabel
    except ImportError:
        raise SkipTest
    ds = Dataset(path).create(force=True)
    ds.config.add('datalad.metadata.nativetype', 'nifti1', where='dataset')
    makedirs(opj(path, 'stim'))
    for src, dst in (('nifti1.nii.gz',
                      opj('sub-01', 'func', 'sub-01_task-some_bold.nii.gz')),
                     ('nifti1.nii.gz',
                      opj('sub-03', 'func', 'sub-03_task-other_bold.nii.gz'))):
        copy(opj(dirname(dirname(__file__)), 'tests', 'data', src),
             opj(path, dst))
    ds.add('.')
    ds.aggregate_metadata()
    ok_clean_git(ds.path)
    # basic sanity check on the metadata structure of the dataset
    dsmeta = ds.metadata('.', reporton='datasets')[0]['metadata']
    for src in ('bids', 'nifti1'):
        # something for each one
        assert_in(src, dsmeta)
        # each src declares its own context
        assert_in('@context', dsmeta[src])
        # we have a unique content metadata summary for each src
        assert_in(src, dsmeta['datalad_unique_content_properties'])

    # test default behavior
    with swallow_outputs() as cmo:
        ds.search(show_keys='name', mode='textblob')

        assert_in("""\
id
meta
parentds
path
type
""", cmo.out)

    target_out = """\
bids.BIDSVersion
bids.author
bids.citation
bids.conformsto
bids.description
bids.fundedby
bids.license
bids.modality
bids.name
bids.participant.age(years)
bids.participant.gender
bids.participant.handedness
bids.participant.hearing_problems_current
bids.participant.id
bids.participant.language
bids.subject
bids.task
bids.type
datalad_core.id
datalad_core.refcommit
id
nifti1.cal_max
nifti1.cal_min
nifti1.datatype
nifti1.description
nifti1.dim
nifti1.freq_axis
nifti1.intent
nifti1.magic
nifti1.phase_axis
nifti1.pixdim
nifti1.qform_code
nifti1.sform_code
nifti1.sizeof_hdr
nifti1.slice_axis
nifti1.slice_duration
nifti1.slice_end
nifti1.slice_order
nifti1.slice_start
nifti1.spatial_resolution(mm)
nifti1.t_unit
nifti1.temporal_spacing(s)
nifti1.toffset
nifti1.vox_offset
nifti1.xyz_unit
parentds
path
type
"""

    # check generated autofield index keys
    with swallow_outputs() as cmo:
        ds.search(mode='autofield', show_keys='name')
        # it is impossible to assess what is different from that dump
        assert_in(target_out, cmo.out)

    assert_result_count(ds.search('blablob#'), 0)
    # now check that we can discover things from the aggregated metadata
    for mode, query, hitpath, matched_key, matched_val in (
            # random keyword query
            # multi word query implies AND
        ('textblob', ['bold', 'male'],
         opj('sub-01', 'func',
             'sub-01_task-some_bold.nii.gz'), 'meta', 'male'),
            # report which field matched with auto-field
        ('autofield', 'female',
         opj('sub-03', 'func', 'sub-03_task-other_bold.nii.gz'),
         'bids.participant.gender', 'female'),
            # autofield multi-word query is also AND
        ('autofield', ['bids.type:bold', 'bids.participant.id:01'],
         opj('sub-01', 'func',
             'sub-01_task-some_bold.nii.gz'), 'bids.type', 'bold'),
            # TODO extend with more complex queries to test whoosh
            # query language configuration
    ):
        res = ds.search(query, mode=mode, full_record=True)
        if mode == 'textblob':
            # 'textblob' does datasets by default only (be could be configured otherwise
            assert_result_count(res, 1)
        else:
            # the rest has always a file and the dataset, because they carry metadata in
            # the same structure
            assert_result_count(res, 2)
            assert_result_count(
                res,
                1,
                type='file',
                path=opj(ds.path, hitpath),
                # each file must report the ID of the dataset it is from, critical for
                # discovering related content
                dsid=ds.id)
        assert_result_count(res, 1, type='dataset', path=ds.path, dsid=ds.id)
        # test the key and specific value of the match
        assert_in(matched_key, res[-1]['query_matched'])
        assert_equal(res[-1]['query_matched'][matched_key], matched_val)
Exemple #32
0
def test_ephemeral(origin_path, bare_path, clone1_path, clone2_path,
                   clone3_path):

    file_test = Path('ds') / 'test.txt'
    file_testsub = Path('ds') / 'subdir' / 'testsub.txt'

    origin = Dataset(origin_path).create(force=True)
    if origin.repo.is_managed_branch():
        raise SkipTest('Ephemeral clones cannot use adjusted mode repos')

    origin.save()
    # 1. clone via path
    clone1 = clone(origin_path, clone1_path, reckless='ephemeral')

    can_symlink = has_symlink_capability()

    if can_symlink:
        clone1_annex = (clone1.repo.dot_git / 'annex')
        ok_(clone1_annex.is_symlink())
        ok_(clone1_annex.resolve().samefile(origin.repo.dot_git / 'annex'))
        if not clone1.repo.is_managed_branch():
            # TODO: We can't properly handle adjusted branch yet
            eq_((clone1.pathobj / file_test).read_text(), 'some')
            eq_((clone1.pathobj / file_testsub).read_text(), 'somemore')

    # 2. clone via file-scheme URL
    clone2 = clone('file://' + Path(origin_path).as_posix(),
                   clone2_path,
                   reckless='ephemeral')

    if can_symlink:
        clone2_annex = (clone2.repo.dot_git / 'annex')
        ok_(clone2_annex.is_symlink())
        ok_(clone2_annex.resolve().samefile(origin.repo.dot_git / 'annex'))
        if not clone2.repo.is_managed_branch():
            # TODO: We can't properly handle adjusted branch yet
            eq_((clone2.pathobj / file_test).read_text(), 'some')
            eq_((clone2.pathobj / file_testsub).read_text(), 'somemore')

    # 3. add something to clone1 and push back to origin availability from
    # clone1 should not be propagated (we declared 'here' dead to that end)

    (clone1.pathobj / 'addition.txt').write_text("even more")
    clone1.save()
    origin.config.set("receive.denyCurrentBranch",
                      "updateInstead",
                      where="local")
    # Note, that the only thing to test is git-annex-dead here,
    # if we couldn't symlink:
    clone1.publish(to='origin',
                   transfer_data='none' if can_symlink else 'auto')
    if not origin.repo.is_managed_branch():
        # test logic cannot handle adjusted branches
        eq_(origin.repo.get_hexsha(), clone1.repo.get_hexsha())
    res = origin.repo.whereis("addition.txt")
    if can_symlink:
        # obv. present in origin, but this is not yet known to origin:
        eq_(res, [])
        res = origin.repo.fsck()
        assert_result_count(res, 3, success=True)
        # TODO: Double check whether annex reports POSIX paths o windows!
        eq_({str(file_test), str(file_testsub), "addition.txt"},
            {r['file']
             for r in res})
        # now origin knows:
    res = origin.repo.whereis("addition.txt")
    eq_(res, [origin.config.get("annex.uuid")])

    # 4. ephemeral clone from a bare repo
    runner = GitWitlessRunner()
    runner.run(['git', 'clone', '--bare', origin_path, bare_path])
    runner.run(['git', 'annex', 'init'], cwd=bare_path)

    eph_from_bare = clone(bare_path, clone3_path, reckless='ephemeral')
    can_symlink = has_symlink_capability()

    if can_symlink:
        # Bare repo uses dirhashlower by default, while a standard repo uses
        # dirhashmixed. Symlinking different object trees doesn't really work.
        # Don't test that here, since this is not a matter of the "ephemeral"
        # option alone. We should have such a setup in the RIA tests and test
        # for data access there.
        # Here we only test for the correct linking.
        eph_annex = eph_from_bare.repo.dot_git / 'annex'
        ok_(eph_annex.is_symlink())
        ok_(eph_annex.resolve().samefile(Path(bare_path) / 'annex'))
Exemple #33
0
def test_run_subdataset_install(path):
    path = Path(path)
    ds_src = Dataset(path / "src").create()
    # Repository setup
    #
    # .
    # |-- a/
    # |   |-- a2/
    # |   |   `-- img
    # |   `-- img
    # |-- b/
    # |   `-- b2/
    # |       `-- img
    # |-- c/
    # |   `-- c2/
    # |       `-- img
    # `-- d/
    #     `-- d2/
    #         `-- img
    ds_src_a = ds_src.create("a")
    ds_src_a2 = ds_src_a.create("a2")
    ds_src_b = Dataset(ds_src.pathobj / "b").create()
    ds_src_b2 = ds_src_b.create("b2")
    ds_src_c = ds_src.create("c")
    ds_src_c2 = ds_src_c.create("c2")
    ds_src_d = Dataset(ds_src.pathobj / "d").create()
    ds_src_d2 = ds_src_d.create("d2")

    ds_src.save()

    add_pyscript_image(ds_src_a, "in-a", "img")
    add_pyscript_image(ds_src_a2, "in-a2", "img")
    add_pyscript_image(ds_src_b2, "in-b2", "img")
    add_pyscript_image(ds_src_c2, "in-c2", "img")
    add_pyscript_image(ds_src_d2, "in-d2", "img")

    ds_src.save(recursive=True)

    ds_dest = clone(ds_src.path, str(path / "dest"))
    ds_dest_a2 = Dataset(ds_dest.pathobj / "a" / "a2")
    ds_dest_b2 = Dataset(ds_dest.pathobj / "b" / "b2")
    ds_dest_c2 = Dataset(ds_dest.pathobj / "c" / "c2")
    ds_dest_d2 = Dataset(ds_dest.pathobj / "d" / "d2")
    assert_false(ds_dest_a2.is_installed())
    assert_false(ds_dest_b2.is_installed())
    assert_false(ds_dest_c2.is_installed())
    assert_false(ds_dest_d2.is_installed())

    # Needed subdatasets are installed if container name is given...
    res = ds_dest.containers_run(["arg"], container_name="a/a2/in-a2")
    assert_result_count(res,
                        1,
                        action="install",
                        status="ok",
                        path=ds_dest_a2.path)
    assert_result_count(res,
                        1,
                        action="get",
                        status="ok",
                        path=str(ds_dest_a2.pathobj / "img"))
    ok_(ds_dest_a2.is_installed())
    # ... even if the name and path do not match.
    res = ds_dest.containers_run(["arg"], container_name="b/b2/in-b2")
    assert_result_count(res,
                        1,
                        action="install",
                        status="ok",
                        path=ds_dest_b2.path)
    assert_result_count(res,
                        1,
                        action="get",
                        status="ok",
                        path=str(ds_dest_b2.pathobj / "img"))
    ok_(ds_dest_b2.is_installed())
    # Subdatasets will also be installed if given an image path...
    res = ds_dest.containers_run(["arg"], container_name=str(Path("c/c2/img")))
    assert_result_count(res,
                        1,
                        action="install",
                        status="ok",
                        path=ds_dest_c2.path)
    assert_result_count(res,
                        1,
                        action="get",
                        status="ok",
                        path=str(ds_dest_c2.pathobj / "img"))
    ok_(ds_dest_c2.is_installed())
    ds_dest.containers_run(["arg"], container_name=str(Path("d/d2/img")))

    # There's no install record if subdataset is already present.
    res = ds_dest.containers_run(["arg"], container_name="a/a2/in-a2")
    assert_not_in_results(res, action="install")
def test_publish_plain_git(origin, src_path, dst_path):
    # TODO: Since it's mostly the same, melt with test_publish_simple

    # prepare src
    source = install(src_path, source=origin, recursive=True)
    # forget we cloned it (provide no 'origin' anymore), which should lead to
    # setting tracking branch to target:
    source.repo.remove_remote("origin")

    # create plain git at target:
    target = GitRepo(dst_path, create=True)
    target.checkout("TMP", ["-b"])
    source.repo.add_remote("target", dst_path)

    res = publish(dataset=source, to="target", result_xfm='datasets')
    eq_(res, [source])

    assert_repo_status(source.repo, annex=None)
    assert_repo_status(target, annex=None)
    eq_(list(target.get_branch_commits_("master")),
        list(source.repo.get_branch_commits_("master")))

    # don't fail when doing it again
    res = publish(dataset=source, to="target")
    # and nothing is pushed
    assert_result_count(res, 1, status='notneeded')

    assert_repo_status(source.repo, annex=None)
    assert_repo_status(target, annex=None)
    eq_(list(target.get_branch_commits_("master")),
        list(source.repo.get_branch_commits_("master")))

    # some modification:
    with open(opj(src_path, 'test_mod_file'), "w") as f:
        f.write("Some additional stuff.")
    source.save(opj(src_path, 'test_mod_file'),
                to_git=True,
                message="Modified.")
    assert_repo_status(source.repo, annex=None)

    res = publish(dataset=source, to='target', result_xfm='datasets')
    eq_(res, [source])

    assert_repo_status(dst_path, annex=None)
    eq_(list(target.get_branch_commits_("master")),
        list(source.repo.get_branch_commits_("master")))

    # amend and change commit msg in order to test for force push:
    source.repo.commit("amended", options=['--amend'])
    # push should be rejected (non-fast-forward):
    assert_raises(IncompleteResultsError,
                  publish,
                  dataset=source,
                  to='target',
                  result_xfm='datasets')
    # push with force=True works:
    res = publish(dataset=source,
                  to='target',
                  result_xfm='datasets',
                  force=True)
    eq_(res, [source])
Exemple #35
0
def test_unlock(path):

    ds = Dataset(path)

    # file is currently locked:
    # TODO: use get_annexed_files instead of hardcoded filename
    assert_raises(IOError, open, opj(path, 'test-annex.dat'), "w")

    # Note: In V6+ we can unlock even if the file's content isn't present, but
    # doing so when unlock() is called with no paths isn't consistent with the
    # current behavior when an explicit path is given (it doesn't unlock) or
    # with the behavior in V5, so we don't do it.

    # Unlocking the dataset without an explicit path does not fail if there
    # are files without content.
    eq_(ds.unlock(path=None, on_failure="ignore"), [])
    eq_(ds.unlock(path=[], on_failure="ignore"), [])
    # cannot unlock without content (annex get wasn't called)
    assert_in_results(ds.unlock(path="test-annex.dat", on_failure="ignore"),
                      path=opj(path, "test-annex.dat"),
                      status="impossible")

    ds.repo.get('test-annex.dat')
    result = ds.unlock()
    assert_result_count(result, 1)
    assert_in_results(result, path=opj(ds.path, 'test-annex.dat'), status='ok')

    with open(opj(path, 'test-annex.dat'), "w") as f:
        f.write("change content")

    ds.repo.add('test-annex.dat')
    # in V6+ we need to explicitly re-lock it:
    if ds.repo.supports_unlocked_pointers:
        # TODO: RF: make 'lock' a command as well
        # re-lock to further on have a consistent situation with V5:
        ds.repo._git_custom_command('test-annex.dat', ['git', 'annex', 'lock'])
    ds.repo.commit("edit 'test-annex.dat' via unlock and lock it again")

    # after commit, file is locked again:
    assert_raises(IOError, open, opj(path, 'test-annex.dat'), "w")

    # content was changed:
    with open(opj(path, 'test-annex.dat'), "r") as f:
        eq_("change content", f.read())

    # unlock again, this time more specific:
    result = ds.unlock(path='test-annex.dat')
    assert_result_count(result, 1)

    assert_in_results(result, path=opj(ds.path, 'test-annex.dat'), status='ok')

    with open(opj(path, 'test-annex.dat'), "w") as f:
        f.write("change content again")

    ds.repo.add('test-annex.dat')
    # in V6+ we need to explicitly re-lock it:
    if ds.repo.supports_unlocked_pointers:
        # TODO: RF: make 'lock' a command as well
        # re-lock to further on have a consistent situation with V5:
        ds.repo._git_custom_command('test-annex.dat', ['git', 'annex', 'lock'])
    ds.repo.commit("edit 'test-annex.dat' via unlock and lock it again")

    # TODO:
    # BOOOM: test-annex.dat writeable in V6!
    # Why the hell is this different than the first time we wrote to the file
    # and locked it again?
    # Also: After opening the file is empty.

    # after commit, file is locked again:
    assert_raises(IOError, open, opj(path, 'test-annex.dat'), "w")

    # content was changed:
    with open(opj(path, 'test-annex.dat'), "r") as f:
        eq_("change content again", f.read())
Exemple #36
0
def test_container_files(path, super_path):
    raise SkipTest('SingularityHub is gone for now')
    ds = Dataset(path).create()
    cmd = ['dir'] if on_windows else ['ls']

    # plug in a proper singularity image
    ds.containers_add(
        'mycontainer',
        url=testimg_url,
        image='righthere',
        # the next one is auto-guessed
        #call_fmt='singularity exec {img} {cmd}'
    )
    assert_result_count(ds.containers_list(),
                        1,
                        path=op.join(ds.path, 'righthere'),
                        name='mycontainer')
    ok_clean_git(path)

    def assert_no_change(res, path):
        # this command changed nothing
        #
        # Avoid specifying the action because it will change from "add" to
        # "save" in DataLad v0.12.
        assert_result_count(res,
                            1,
                            status='notneeded',
                            path=path,
                            type='dataset')

    # now we can run stuff in the container
    # and because there is just one, we don't even have to name the container
    res = ds.containers_run(cmd)
    # container becomes an 'input' for `run` -> get request, but "notneeded"
    assert_result_count(res,
                        1,
                        action='get',
                        status='notneeded',
                        path=op.join(ds.path, 'righthere'),
                        type='file')
    assert_no_change(res, ds.path)

    # same thing as we specify the container by its name:
    res = ds.containers_run(cmd, container_name='mycontainer')
    # container becomes an 'input' for `run` -> get request, but "notneeded"
    assert_result_count(res,
                        1,
                        action='get',
                        status='notneeded',
                        path=op.join(ds.path, 'righthere'),
                        type='file')
    assert_no_change(res, ds.path)

    # we can also specify the container by its path:
    res = ds.containers_run(cmd, container_name=op.join(ds.path, 'righthere'))
    # container becomes an 'input' for `run` -> get request, but "notneeded"
    assert_result_count(res,
                        1,
                        action='get',
                        status='notneeded',
                        path=op.join(ds.path, 'righthere'),
                        type='file')
    assert_no_change(res, ds.path)

    # Now, test the same thing, but with this dataset being a subdataset of
    # another one:

    super_ds = Dataset(super_path).create()
    super_ds.install("sub", source=path)

    # When running, we don't discover containers in subdatasets
    with assert_raises(ValueError) as cm:
        super_ds.containers_run(cmd)
    assert_in("No known containers", str(cm.exception))
    # ... unless we need to specify the name
    res = super_ds.containers_run(cmd, container_name="sub/mycontainer")
    # container becomes an 'input' for `run` -> get request (needed this time)
    assert_result_count(res,
                        1,
                        action='get',
                        status='ok',
                        path=op.join(super_ds.path, 'sub', 'righthere'),
                        type='file')
    assert_no_change(res, super_ds.path)
Exemple #37
0
def test_rerun_onto(path):
    ds = Dataset(path).create()

    grow_file = opj(path, "grows")

    # Make sure we can handle range-specifications that yield no results.
    for since in ["", "HEAD"]:
        assert_result_count(ds.rerun("HEAD",
                                     onto="",
                                     since=since,
                                     on_failure="ignore"),
                            1,
                            status="impossible",
                            action="run")

    ds.run('echo static-content > static')
    ds.repo.tag("static")
    ds.run('echo x$(cat grows) > grows')
    ds.rerun()
    eq_('xx\n', open(grow_file).read())

    # If we run the "static" change on top of itself, we end up in the
    # same (but detached) place.
    ds.rerun(revision="static", onto="static")
    ok_(ds.repo.get_active_branch() is None)
    eq_(ds.repo.repo.git.rev_parse("HEAD"),
        ds.repo.repo.git.rev_parse("static"))

    # If we run the "static" change from the same "base", we end up
    # with a new commit.
    ds.repo.checkout("master")
    ds.rerun(revision="static", onto="static^")
    ok_(ds.repo.get_active_branch() is None)
    neq_(ds.repo.repo.git.rev_parse("HEAD"),
         ds.repo.repo.git.rev_parse("static"))
    assert_result_count(ds.diff(revision="HEAD..static"), 0)
    for revrange in ["..static", "static.."]:
        assert_result_count(ds.repo.repo.git.rev_list(revrange).split(), 1)

    # Unlike the static change, if we run the ever-growing change on
    # top of itself, we end up with a new commit.
    ds.repo.checkout("master")
    ds.rerun(onto="HEAD")
    ok_(ds.repo.get_active_branch() is None)
    neq_(ds.repo.repo.git.rev_parse("HEAD"),
         ds.repo.repo.git.rev_parse("master"))

    # An empty `onto` means use the parent of the first revision.
    ds.repo.checkout("master")
    ds.rerun(since="static^", onto="")
    ok_(ds.repo.get_active_branch() is None)
    for revrange in ["..master", "master.."]:
        assert_result_count(ds.repo.repo.git.rev_list(revrange).split(), 3)

    # An empty `onto` means use the parent of the first revision that
    # has a run command.
    ds.repo.checkout("master")
    ds.rerun(since="", onto="", branch="from-base")
    eq_(ds.repo.get_active_branch(), "from-base")
    assert_result_count(ds.diff(revision="master..from-base"), 0)
    eq_(ds.repo.get_merge_base(["static", "from-base"]),
        ds.repo.repo.git.rev_parse("static^"))
Exemple #38
0
def test_rerun(path, nodspath):
    ds = Dataset(path).create()
    sub = ds.create('sub')
    probe_path = opj(sub.path, 'sequence')
    # run inside the dataset
    with chpwd(path), \
            swallow_outputs():
        ds.run('echo x$(cat sub/sequence) > sub/sequence')
    # command ran once, all clean
    ok_clean_git(ds.path)
    eq_('x\n', open(probe_path).read())
    # now, for a rerun we can be anywhere, PWD and all are recorded
    # moreover, rerun must figure out which bits to unlock, even in
    # subdatasets
    with chpwd(nodspath), \
            swallow_outputs():
        ds.rerun()
    ok_clean_git(ds.path)
    # ran twice now
    eq_('xx\n', open(probe_path).read())

    # Rerunning from a subdataset skips the command.
    _, sub_info = get_run_info(ds, sub.repo.repo.head.commit.message)
    eq_(ds.id, sub_info["dsid"])
    assert_result_count(sub.rerun(return_type="list", on_failure="ignore"),
                        1,
                        status="impossible",
                        action="run",
                        rerun_action="skip")
    eq_('xx\n', open(probe_path).read())

    # Rerun fails with a dirty repo.
    dirt = opj(path, "dirt")
    with open(dirt, "w") as fh:
        fh.write("")
    assert_status('impossible', ds.rerun(on_failure="ignore"))
    remove(dirt)
    ok_clean_git(ds.path)

    # Make a non-run commit.
    with open(opj(path, "nonrun-file"), "w") as f:
        f.write("foo")
    ds.add("nonrun-file")
    # Now rerun the buried command.
    ds.rerun(revision="HEAD~", message="rerun buried")
    eq_('xxx\n', open(probe_path).read())
    # Also check that the messasge override worked.
    eq_(ds.repo.repo.head.commit.message.splitlines()[0],
        "[DATALAD RUNCMD] rerun buried")
    # Or a range of commits, skipping non-run commits.
    ds.rerun(since="HEAD~3")
    eq_('xxxxx\n', open(probe_path).read())
    # Or --since= to run all reachable commits.
    ds.rerun(since="")
    eq_('xxxxxxxxxx\n', open(probe_path).read())

    # We can get back a report of what would happen rather than actually
    # rerunning anything.
    report = ds.rerun(since="", report=True, return_type="list")
    # Nothing changed.
    eq_('xxxxxxxxxx\n', open(probe_path).read())
    assert_result_count(report, 1, rerun_action="skip")
    report[-1]["commit"] == ds.repo.get_hexsha()

    # If a file is dropped, we remove it instead of unlocking it.
    ds.drop(probe_path, check=False)
    ds.rerun()
    eq_('x\n', open(probe_path).read())
    # If the history to rerun has a merge commit, we abort.
    ds.repo.checkout("HEAD~3", options=["-b", "topic"])
    with open(opj(path, "topic-file"), "w") as f:
        f.write("topic")
    ds.add("topic-file")
    ds.repo.checkout("master")
    ds.repo.merge("topic")
    ok_clean_git(ds.path)
    assert_raises(IncompleteResultsError, ds.rerun)
Exemple #39
0
def test_aggregation(path):
    # a hierarchy of three (super/sub)datasets, each with some native metadata
    ds = Dataset(op.join(path, 'origin')).create(force=True)
    ds.config.add('datalad.metadata.nativetype',
                  'frictionless_datapackage',
                  where='dataset')
    subds = ds.create('sub', force=True)
    subds.config.add('datalad.metadata.nativetype',
                     'frictionless_datapackage',
                     where='dataset')
    subsubds = subds.create('subsub', force=True)
    subsubds.config.add('datalad.metadata.nativetype',
                        'frictionless_datapackage',
                        where='dataset')
    assert_status('ok', ds.save(recursive=True))
    # while we are at it: dot it again, nothing should happen
    assert_status('notneeded', ds.save(recursive=True))

    assert_repo_status(ds.path)
    # aggregate metadata from all subdatasets into any superdataset, including
    # intermediate ones
    res = ds.meta_aggregate(recursive=True, into='all')
    # we get success report for both subdatasets and the superdataset,
    # and they get saved
    assert_result_count(res, 3, status='ok', action='meta_aggregate')
    # the respective super datasets see two saves, one to record the change
    # in the subdataset after its own aggregation, and one after the super
    # updated with aggregated metadata
    assert_result_count(res, 5, status='ok', action='save', type='dataset')
    # nice and tidy
    assert_repo_status(ds.path)

    # quick test of aggregate report
    aggs = ds.meta_dump(reporton='aggregates', recursive=True)
    # one for each dataset
    assert_result_count(aggs, 3)
    # mother also report layout version
    assert_result_count(aggs, 1, path=ds.path, layout_version=1)

    # store clean direct result
    origres = ds.meta_dump(recursive=True)
    # basic sanity check
    assert_result_count(origres, 3, type='dataset')
    assert_result_count([r for r in origres if r['path'].endswith('.json')],
                        3,
                        type='file')  # Now that we have annex.key
    # three different IDs
    eq_(
        3,
        len(
            set([
                _get_dsid_from_core_metadata(s['metadata']['metalad_core'])
                for s in origres if s['type'] == 'dataset'
            ])))
    # and we know about all three datasets
    for name in ('MOTHER_äöü東', 'child_äöü東', 'grandchild_äöü東'):
        assert_true(
            sum([s['metadata']['frictionless_datapackage']['name'] \
                    == assure_unicode(name) for s in origres
                 if s['type'] == 'dataset']))

    # now clone the beast to simulate a new user installing an empty dataset
    clone = install(op.join(path, 'clone'),
                    source=ds.path,
                    result_xfm='datasets',
                    return_type='item-or-list')
    # ID mechanism works
    eq_(ds.id, clone.id)

    # get fresh metadata
    cloneres = clone.meta_dump()
    # basic sanity check
    assert_result_count(cloneres, 1, type='dataset')
    # payload file
    assert_result_count(cloneres, 1, type='file')

    # now loop over the previous results from the direct metadata query of
    # origin and make sure we get the extact same stuff from the clone
    _compare_metadata_helper(origres, clone)

    # now obtain a subdataset in the clone, should make no difference
    assert_status('ok',
                  clone.install('sub', result_xfm=None, return_type='list'))
    _compare_metadata_helper(origres, clone)

    # test search in search tests, not all over the place
    ## query smoke test
    assert_result_count(clone.search('mother', mode='egrep'), 1)
    assert_result_count(clone.search('(?i)MoTHER', mode='egrep'), 1)

    child_res = clone.search('child', mode='egrep')
    assert_result_count(child_res, 2)
    for r in child_res:
        if r['type'] == 'dataset':
            assert_in(r['query_matched']['frictionless_datapackage.name'],
                      r['metadata']['frictionless_datapackage']['name'])
Exemple #40
0
def test_unlock(path):

    ds = Dataset(path)

    # file is currently locked:
    # TODO: use get_annexed_files instead of hardcoded filename
    assert_raises(IOError, open, opj(path, 'test-annex.dat'), "w")

    # in direct mode there is no unlock:
    if ds.repo.is_direct_mode():
        res = ds.unlock()
        assert_result_count(res, 1)
        assert_status('notneeded', res)

    # in V6 we can unlock even if the file's content isn't present:
    elif ds.repo.config.getint("annex", "version") == 6:
        res = ds.unlock()
        assert_result_count(res, 1)
        assert_status('ok', res)
        # TODO: RF: make 'lock' a command as well
        # re-lock to further on have a consistent situation with V5:
        ds.repo._git_custom_command('test-annex.dat', ['git', 'annex', 'lock'])
    else:
        # cannot unlock without content (annex get wasn't called)
        assert_raises(CommandError, ds.unlock)  # FIXME

    ds.repo.get('test-annex.dat')
    result = ds.unlock()
    assert_result_count(result, 1)
    if ds.repo.is_direct_mode():
        assert_status('notneeded', result)
    else:
        assert_in_results(result, path=opj(ds.path, 'test-annex.dat'), status='ok')

    with open(opj(path, 'test-annex.dat'), "w") as f:
        f.write("change content")

    ds.repo.add('test-annex.dat')
    # in V6 we need to explicitly re-lock it:
    if ds.repo.config.getint("annex", "version") == 6:
        # TODO: RF: make 'lock' a command as well
        # re-lock to further on have a consistent situation with V5:
        ds.repo._git_custom_command('test-annex.dat', ['git', 'annex', 'lock'])
    ds.repo.commit("edit 'test-annex.dat' via unlock and lock it again")

    if not ds.repo.is_direct_mode():
        # after commit, file is locked again:
        assert_raises(IOError, open, opj(path, 'test-annex.dat'), "w")

    # content was changed:
    with open(opj(path, 'test-annex.dat'), "r") as f:
        eq_("change content", f.read())

    # unlock again, this time more specific:
    result = ds.unlock(path='test-annex.dat')
    assert_result_count(result, 1)

    if ds.repo.is_direct_mode():
        assert_in_results(result, path=opj(ds.path, 'test-annex.dat'), status='notneeded')
    else:
        assert_in_results(result, path=opj(ds.path, 'test-annex.dat'), status='ok')

    with open(opj(path, 'test-annex.dat'), "w") as f:
        f.write("change content again")

    ds.repo.add('test-annex.dat')
    # in V6 we need to explicitly re-lock it:
    if ds.repo.config.getint("annex", "version") == 6:
        # TODO: RF: make 'lock' a command as well
        # re-lock to further on have a consistent situation with V5:
        ds.repo._git_custom_command('test-annex.dat', ['git', 'annex', 'lock'])
    ds.repo.commit("edit 'test-annex.dat' via unlock and lock it again")

    # TODO:
    # BOOOM: test-annex.dat writeable in V6!
    # Why the hell is this different than the first time we wrote to the file
    # and locked it again?
    # Also: After opening the file is empty.

    if not ds.repo.is_direct_mode():
        # after commit, file is locked again:
        assert_raises(IOError, open, opj(path, 'test-annex.dat'), "w")

    # content was changed:
    with open(opj(path, 'test-annex.dat'), "r") as f:
        eq_("change content again", f.read())
Exemple #41
0
def test_save(path):

    ds = Dataset(path)

    with open(op.join(path, "new_file.tst"), "w") as f:
        f.write("something")

    ds.repo.add("new_file.tst", git=True)
    ok_(ds.repo.dirty)

    ds.save(message="add a new file")
    assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo))

    with open(op.join(path, "new_file.tst"), "w") as f:
        f.write("modify")

    ok_(ds.repo.dirty)
    ds.save(message="modified new_file.tst")
    assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo))

    # save works without ds and files given in the PWD
    with open(op.join(path, "new_file.tst"), "w") as f:
        f.write("rapunzel")
    with chpwd(path):
        save(message="love rapunzel")
    assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo))

    # and also without `-a` when things are staged
    with open(op.join(path, "new_file.tst"), "w") as f:
        f.write("exotic")
    ds.repo.add("new_file.tst", git=True)
    with chpwd(path):
        save(message="love marsians")
    assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo))

    files = ['one.txt', 'two.txt']
    for fn in files:
        with open(op.join(path, fn), "w") as f:
            f.write(fn)

    ds.save([op.join(path, f) for f in files])
    # superfluous call to save (alll saved it already), should not fail
    # but report that nothing was saved
    assert_status('notneeded', ds.save(message="set of new files"))
    assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo))

    # create subdataset
    subds = ds.create('subds')
    assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo))
    # modify subds
    with open(op.join(subds.path, "some_file.tst"), "w") as f:
        f.write("something")
    subds.save()
    assert_repo_status(subds.path, annex=isinstance(subds.repo, AnnexRepo))
    # ensure modified subds is committed
    ds.save()
    assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo))

    # now introduce a change downstairs
    subds.create('someotherds')
    assert_repo_status(subds.path, annex=isinstance(subds.repo, AnnexRepo))
    ok_(ds.repo.dirty)
    # and save via subdataset path
    ds.save('subds', version_tag='new_sub')
    assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo))
    tags = ds.repo.get_tags()
    ok_(len(tags) == 1)
    eq_(tags[0], dict(hexsha=ds.repo.get_hexsha(), name='new_sub'))
    # fails when retagged, like git does
    res = ds.save(version_tag='new_sub', on_failure='ignore')
    assert_status('error', res)
    assert_result_count(
        res, 1,
        action='save', type='dataset', path=ds.path,
        message=('cannot tag this version: %s',
                 "fatal: tag 'new_sub' already exists"))
def test_get_modified_subpaths(path):
    ds = Dataset(path).create(force=True)
    suba = ds.create('ba', force=True)
    subb = ds.create('bb', force=True)
    subsub = ds.create(opj('bb', 'bba', 'bbaa'), force=True)
    ds.add('.', recursive=True)
    ok_clean_git(path)

    orig_base_commit = ds.repo.repo.commit().hexsha

    # nothing was modified compared to the status quo, output must be empty
    eq_([],
        list(get_modified_subpaths(
            [dict(path=ds.path)],
            ds, orig_base_commit)))

    # modify one subdataset
    create_tree(subsub.path, {'added': 'test'})
    subsub.add('added')

    # it will replace the requested path with the path of the closest
    # submodule that is modified
    assert_result_count(
        get_modified_subpaths(
            [dict(path=ds.path)],
            ds, orig_base_commit),
        1,
        type='dataset', path=subb.path)

    # make another one dirty
    create_tree(suba.path, {'added': 'test'})

    # now a single query path will result in the two modified subdatasets
    assert_result_count(
        get_modified_subpaths(
            [dict(path=ds.path)],
            ds, orig_base_commit),
        2,
        type='dataset')

    # now save uptop, this will the new state of subb, but keep suba dirty
    ds.save(subb.path, recursive=True)
    # now if we ask for what was last saved, we only get the new state of subb
    assert_result_count(
        get_modified_subpaths(
            [dict(path=ds.path)],
            ds,
            'HEAD~1..HEAD'),
        1,
        type='dataset', path=subb.path)
    # comparing the working tree to head will the dirty suba instead
    assert_result_count(
        get_modified_subpaths(
            [dict(path=ds.path)],
            ds,
            'HEAD'),
        1,
        type='dataset', path=suba.path)

    # add/save everything, become clean
    ds.add('.', recursive=True)
    ok_clean_git(path)
    # nothing is reported as modified
    assert_result_count(
        get_modified_subpaths(
            [dict(path=ds.path)],
            ds,
            'HEAD'),
        0)
    # but looking all the way back, we find all changes
    assert_result_count(
        get_modified_subpaths(
            [dict(path=ds.path)],
            ds,
            orig_base_commit),
        2,
        type='dataset')

    # now we ask specifically for the file we added to subsub above
    query = [dict(path=opj(subsub.path, 'added'))]
    res = list(get_modified_subpaths(query, ds, orig_base_commit))
    # we only get this one result back, and not all the submodule state changes
    # that were also saved in the superdatasets
    assert_result_count(res, 1)
    assert_result_count(
        res, 1, type='file', path=opj(subsub.path, 'added'), state='added')
    # but if we are only looking at the last saved change (suba), we will not
    # find our query return something
    res = get_modified_subpaths(query, ds, 'HEAD^')
    assert_result_count(res, 0)

    # deal with removal (force insufiicient copies error)
    ds.remove(suba.path, check=False)
    ok_clean_git(path)
    res = list(get_modified_subpaths([dict(path=ds.path)], ds, 'HEAD~1..HEAD'))
    # removed submodule + .gitmodules update
    assert_result_count(res, 2)
    assert_result_count(
        res, 1,
        type_src='dataset', path=suba.path)
Exemple #43
0
def test_update_simple(origin, src_path, dst_path):

    # prepare src
    source = install(src_path, source=origin, recursive=True)
    # forget we cloned it (provide no 'origin' anymore), which should lead to
    # setting tracking branch to target:
    source.repo.remove_remote("origin")

    # get a clone to update later on:
    dest = install(dst_path, source=src_path, recursive=True)
    # test setup done;
    # assert all fine
    ok_clean_git(dst_path)
    ok_clean_git(src_path)

    # update yields nothing => up-to-date
    assert_status('ok', dest.update())
    ok_clean_git(dst_path)

    # modify origin:
    with open(opj(src_path, "update.txt"), "w") as f:
        f.write("Additional content")
    source.add(path="update.txt")
    source.save("Added update.txt")
    ok_clean_git(src_path)

    # update without `merge` only fetches:
    assert_status('ok', dest.update())
    # modification is not known to active branch:
    assert_not_in("update.txt",
                  dest.repo.get_files(dest.repo.get_active_branch()))
    # modification is known to branch origin/master
    assert_in("update.txt", dest.repo.get_files("origin/master"))

    # merge:
    assert_status('ok', dest.update(merge=True))
    # modification is now known to active branch:
    assert_in("update.txt", dest.repo.get_files(dest.repo.get_active_branch()))
    # it's known to annex, but has no content yet:
    dest.repo.get_file_key("update.txt")  # raises if unknown
    eq_([False], dest.repo.file_has_content(["update.txt"]))

    # smoke-test if recursive update doesn't fail if submodule is removed
    # and that we can run it from within a dataset without providing it
    # explicitly
    assert_result_count(dest.remove('subm 1'),
                        1,
                        status='ok',
                        action='remove',
                        path=opj(dest.path, 'subm 1'))
    with chpwd(dest.path):
        assert_result_count(update(recursive=True),
                            2,
                            status='ok',
                            type='dataset')
    assert_result_count(dest.update(merge=True, recursive=True),
                        2,
                        status='ok',
                        type='dataset')

    # and now test recursive update with merging in differences
    create_tree(opj(source.path, 'subm 2'), {'load.dat': 'heavy'})
    source.add(opj('subm 2', 'load.dat'),
               message="saving changes within subm2",
               recursive=True)
    assert_result_count(dest.update(merge=True, recursive=True),
                        2,
                        status='ok',
                        type='dataset')
    # and now we can get new file
    dest.get('subm 2/load.dat')
    ok_file_has_content(opj(dest.path, 'subm 2', 'load.dat'), 'heavy')
Exemple #44
0
def test_siblings(origin, repo_path, local_clone_path):

    sshurl = "ssh://push-remote.example.com"
    httpurl1 = "http://remote1.example.com/location"
    httpurl2 = "http://remote2.example.com/location"

    # insufficient arguments
    # we need a dataset to work at
    with chpwd(repo_path):  # not yet there
        assert_raises(InsufficientArgumentsError,
                      siblings, 'add', url=httpurl1)

    # prepare src
    source = install(repo_path, source=origin, recursive=True)
    # pollute config
    depvar = 'remote.test-remote.datalad-publish-depends'
    source.config.add(depvar, 'stupid', where='local')

    # cannot configure unknown remotes as dependencies
    res = siblings(
        'configure',
        dataset=source,
        name="test-remote",
        url=httpurl1,
        publish_depends=['r1', 'r2'],
        on_failure='ignore',
        result_renderer=None)
    assert_status('error', res)
    eq_(res[0]['message'],
        ('unknown sibling(s) specified as publication dependency: %s',
         set(('r1', 'r2'))))
    # prior config was not changed by failed call above
    eq_(source.config.get(depvar, None), 'stupid')

    res = siblings('configure',
                   dataset=source, name="test-remote",
                   url=httpurl1,
                   result_xfm='paths',
                   result_renderer=None)

    eq_(res, [source.path])
    assert_in("test-remote", source.repo.get_remotes())
    eq_(httpurl1,
        source.repo.get_remote_url("test-remote"))

    # reconfiguring doesn't change anything
    siblings('configure', dataset=source, name="test-remote",
             url=httpurl1,
             result_renderer=None)
    assert_in("test-remote", source.repo.get_remotes())
    eq_(httpurl1,
        source.repo.get_remote_url("test-remote"))
    # re-adding doesn't work
    res = siblings('add', dataset=source, name="test-remote",
                   url=httpurl1, on_failure='ignore',
                   result_renderer=None)
    assert_status('error', res)
    # only after removal
    res = siblings('remove', dataset=source, name="test-remote",
                   result_renderer=None)
    assert_status('ok', res)
    assert_not_in("test-remote", source.repo.get_remotes())
    res = siblings('add', dataset=source, name="test-remote",
                   url=httpurl1, on_failure='ignore',
                   result_renderer=None)
    assert_status('ok', res)

    # add to another remote automagically taking it from the url
    # and being in the dataset directory
    with chpwd(source.path):
        res = siblings('add', url=httpurl2,
                       result_renderer=None)
    assert_result_count(
        res, 1,
        name="remote2.example.com", type='sibling')
    assert_in("remote2.example.com", source.repo.get_remotes())

    # don't fail with conflicting url, when using force:
    res = siblings('configure',
                   dataset=source, name="test-remote",
                   url=httpurl1 + "/elsewhere",
                   result_renderer=None)
    assert_status('ok', res)
    eq_(httpurl1 + "/elsewhere",
        source.repo.get_remote_url("test-remote"))

    # no longer a use case, I would need additional convincing that
    # this is anyhow useful other then tripple checking other peoples
    # errors. for an actual check use 'query'
    # maybe it could be turned into a set of warnings when `configure`
    # alters an existing setting, but then why call configure, if you
    # want to keep the old values
    #with assert_raises(RuntimeError) as cm:
    #    add_sibling(dataset=source, name="test-remote",
    #                url=httpurl1 + "/elsewhere")
    #assert_in("""'test-remote' already exists with conflicting settings""",
    #          str(cm.exception))
    ## add a push url without force fails, since in a way the fetch url is the
    ## configured push url, too, in that case:
    #with assert_raises(RuntimeError) as cm:
    #    add_sibling(dataset=source, name="test-remote",
    #                url=httpurl1 + "/elsewhere",
    #                pushurl=sshurl, force=False)
    #assert_in("""'test-remote' already exists with conflicting settings""",
    #          str(cm.exception))

    # add push url (force):
    res = siblings('configure',
                   dataset=source, name="test-remote",
                   url=httpurl1 + "/elsewhere",
                   pushurl=sshurl,
                   result_renderer=None)
    assert_status('ok', res)
    eq_(httpurl1 + "/elsewhere",
        source.repo.get_remote_url("test-remote"))
    eq_(sshurl,
        source.repo.get_remote_url("test-remote", push=True))

    # recursively:
    for r in siblings(
            'configure',
            dataset=source, name="test-remote",
            url=httpurl1 + "/%NAME",
            pushurl=sshurl + "/%NAME",
            recursive=True,
            # we need to disable annex queries, as it will try to access
            # the fake URL configured above
            get_annex_info=False):
        repo = GitRepo(r['path'], create=False)
        assert_in("test-remote", repo.get_remotes())
        url = repo.get_remote_url("test-remote")
        pushurl = repo.get_remote_url("test-remote", push=True)
        ok_(url.startswith(httpurl1 + '/' + basename(source.path)))
        ok_(url.endswith(basename(repo.path)))
        ok_(pushurl.startswith(sshurl + '/' + basename(source.path)))
        ok_(pushurl.endswith(basename(repo.path)))
        eq_(url, r['url'])
        eq_(pushurl, r['pushurl'])

    # recursively without template:
    for r in siblings(
            'configure',
            dataset=source, name="test-remote-2",
            url=httpurl1,
            pushurl=sshurl,
            recursive=True,
            # we need to disable annex queries, as it will try to access
            # the fake URL configured above
            get_annex_info=False,
            result_renderer=None):
        repo = GitRepo(r['path'], create=False)
        assert_in("test-remote-2", repo.get_remotes())
        url = repo.get_remote_url("test-remote-2")
        pushurl = repo.get_remote_url("test-remote-2", push=True)
        ok_(url.startswith(httpurl1))
        ok_(pushurl.startswith(sshurl))
        # FIXME: next condition used to compare the *Repo objects instead of
        # there paths. Due to missing annex-init in
        # datalad/tests/utils.py:clone_url this might not be the same, since
        # `source` actually is an annex, but after flavor 'clone' in
        # `with_testrepos` and then `install` any trace of an annex might be
        # gone in v5 (branch 'master' only), while in direct mode it still is
        # considered an annex. `repo` is forced to be a `GitRepo`, so we might
        # compare two objects of different classes while they actually are
        # pointing to the same repository.
        # See github issue #1854
        if repo.path != source.repo.path:
            ok_(url.endswith('/' + basename(repo.path)))
            ok_(pushurl.endswith(basename(repo.path)))
        eq_(url, r['url'])
        eq_(pushurl, r['pushurl'])

    # recursively without template and pushurl but full "hierarchy"
    # to a local clone
    for r in siblings(
            'configure',
            dataset=source,
            name="test-remote-3",
            url=local_clone_path,
            recursive=True,
            # we need to disable annex queries, as it will try to access
            # the fake URL configured above
            get_annex_info=False,
            result_renderer=None):
        repo = GitRepo(r['path'], create=False)
        assert_in("test-remote-3", repo.get_remotes())
        url = repo.get_remote_url("test-remote-3")
        pushurl = repo.get_remote_url("test-remote-3", push=True)

        eq_(normpath(url),
            normpath(opj(local_clone_path,
                         relpath(str(r['path']), source.path))))
        # https://github.com/datalad/datalad/issues/3951
        ok_(not pushurl)  # no pushurl should be defined
Exemple #45
0
def test_basics(path, nodspath):
    ds = Dataset(path).create()
    last_state = ds.repo.get_hexsha()
    # run inside the dataset
    with chpwd(path), \
            swallow_outputs():
        # provoke command failure
        with assert_raises(CommandError) as cme:
            ds.run('7i3amhmuch9invalid')
            # let's not speculate that the exit code is always 127
            ok_(cme.code > 0)
        eq_(last_state, ds.repo.get_hexsha())
        # now one that must work
        res = ds.run('touch empty', message='TEST')
        ok_clean_git(ds.path)
        assert_result_count(res, 2)
        # TODO 'state' is still untracked!!!
        assert_result_count(res,
                            1,
                            action='add',
                            path=opj(ds.path, 'empty'),
                            type='file')
        assert_result_count(res, 1, action='save', path=ds.path)
        commit_msg = ds.repo.repo.head.commit.message
        ok_(commit_msg.startswith('[DATALAD RUNCMD] TEST'))
        # crude test that we have a record for the PWD
        assert_in('"pwd": "."', commit_msg)
        last_state = ds.repo.get_hexsha()
        # now run a command that will not alter the dataset
        res = ds.run('touch empty', message='NOOP_TEST')
        assert_status('notneeded', res)
        eq_(last_state, ds.repo.get_hexsha())
        # We can also run the command via a single-item list because this is
        # what the CLI interface passes in for quoted commands.
        res = ds.run(['touch empty'], message='NOOP_TEST')
        assert_status('notneeded', res)

    # run outside the dataset, should still work but with limitations
    with chpwd(nodspath), \
            swallow_outputs():
        res = ds.run(['touch', 'empty2'], message='TEST')
        assert_status('ok', res)
        assert_result_count(res,
                            1,
                            action='add',
                            path=opj(ds.path, 'empty2'),
                            type='file')

    # running without a command is a noop
    with chpwd(path):
        with swallow_logs(new_level=logging.WARN) as cml:
            ds.run()
            assert_in("No command given", cml.out)

    # Simple sidecar message checks.
    ds.run(["touch", "dummy0"], message="sidecar arg", sidecar=True)
    assert_not_in('"cmd":', ds.repo.repo.head.commit.message)

    real_get = ds.config.get

    def mocked_get(key, default=None):
        if key == "datalad.run.record-sidecar":
            return True
        return real_get(key, default)

    with patch.object(ds.config, "get", mocked_get):
        ds.run(["touch", "dummy1"], message="sidecar config")
    assert_not_in('"cmd":', ds.repo.repo.head.commit.message)
Exemple #46
0
def test_aggregate_removal(path):
    base = Dataset(op.join(path, 'origin')).create(force=True)
    # force all metadata objects into the annex
    with open(op.join(base.path, '.datalad', '.gitattributes'), 'w') as f:
        f.write(
            '** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n'
        )
    sub = base.create('sub', force=True)
    subsub = sub.create(op.join('subsub'), force=True)
    base.save(recursive=True)
    base.meta_aggregate(recursive=True, into='all')
    assert_repo_status(base.path)
    res = base.meta_dump(reporton='aggregates', recursive=True)
    assert_result_count(res, 3)
    assert_result_count(res, 1, path=subsub.path)
    # check that we only have object files that are listed in agginfo
    eq_(_get_contained_objs(base), _get_referenced_objs(base))
    # now delete the deepest subdataset to test cleanup of aggregated objects
    # in the top-level ds
    base.remove(op.join('sub', 'subsub'), check=False)
    # now aggregation has to detect that subsub is not simply missing, but gone
    # for good
    base.meta_aggregate(recursive=True, into='all')
    assert_repo_status(base.path)
    # internally consistent state
    eq_(_get_contained_objs(base), _get_referenced_objs(base))
    # info on subsub was removed at all levels
    res = base.meta_dump(reporton='aggregates', recursive=True)
    assert_result_count(res, 0, path=subsub.path)
    assert_result_count(res, 2)
    res = sub.meta_dump(reporton='aggregates', recursive=True)
    assert_result_count(res, 0, path=subsub.path)
    assert_result_count(res, 1)
Exemple #47
0
def _test_bare_git_version_1(host, dspath, store):
    # This test should take a dataset and create a bare repository at the remote
    # end from it.
    # Given, that it is placed correctly within a tree of dataset, that remote
    # thing should then be usable as an ora-remote as well as as a git-type
    # remote.
    # Note: Usability of git remote by annex depends on dataset layout version
    #       (dirhashlower vs. -mixed).
    #       For version 1 (lower) upload and consumption should be
    #       interchangeable. It doesn't matter which remote is used for what
    #       direction.
    ds_path = Path(dspath)
    store = Path(store)
    ds = Dataset(ds_path).create()
    populate_dataset(ds)
    ds.save()

    bare_repo_path, _, _ = get_layout_locations(1, store, ds.id)
    # Use git to make sure the remote end is what git thinks a bare clone of it
    # should look like
    subprocess.run(['git', 'clone', '--bare',
                    quote_cmdlinearg(str(dspath)),
                    quote_cmdlinearg(str(bare_repo_path))
                    ])

    if host:
        url = "ria+ssh://{host}{path}".format(host=host,
                                              path=store)
    else:
        url = "ria+{}".format(store.as_uri())
    init_opts = common_init_opts + ['url={}'.format(url)]
    # set up store:
    io = SSHRemoteIO(host) if host else LocalIO()
    create_store(io, store, '1')
    # set up the dataset location, too.
    # Note: Dataset layout version 1 (dirhash lower):
    create_ds_in_store(io, store, ds.id, '1', '1')

    # Now, let's have the bare repo as a git remote and use it with annex
    git_url = "ssh://{host}{path}".format(host=host, path=bare_repo_path) \
        if host else bare_repo_path.as_uri()
    ds.repo.add_remote('bare-git', git_url)
    ds.repo.enable_remote('bare-git')

    # copy files to the remote
    ds.repo.copy_to('.', 'bare-git')
    eq_(len(ds.repo.whereis('one.txt')), 2)

    # now we can drop all content locally, reobtain it, and survive an
    # fsck
    ds.drop('.')
    ds.get('.')
    assert_status('ok', [annexjson2result(r, ds) for r in ds.repo.fsck()])

    # Now, add the ora remote:
    ds.repo.init_remote('ora-remote', options=init_opts)
    # fsck to make availability known
    assert_status(
        'ok',
        [annexjson2result(r, ds)
         for r in ds.repo.fsck(remote='ora-remote', fast=True)])
    eq_(len(ds.repo.whereis('one.txt')), 3)

    # Now move content from git-remote to local and see it not being available
    # via bare-git anymore.
    ds.repo.call_git(['annex', 'move', '--all', '--from=bare-git'])
    # ora-remote doesn't know yet:
    eq_(len(ds.repo.whereis('one.txt')), 2)

    # But after fsck it does:
    fsck_res = [annexjson2result(r, ds)
                for r in ds.repo.fsck(remote='ora-remote', fast=True)]
    assert_result_count(fsck_res,
                        1,
                        status='error',
                        message='** Based on the location log, one.txt\n'
                                '** was expected to be present, '
                                'but its content is missing.')
    assert_result_count(fsck_res,
                        1,
                        status='error',
                        message='** Based on the location log, subdir/two\n'
                                '** was expected to be present, '
                                'but its content is missing.')
    eq_(len(ds.repo.whereis('one.txt')), 1)
    # and the other way around: upload via ora-remote and have it available via
    # git-remote:
    ds.repo.copy_to('.', 'ora-remote')
    # fsck to make availability known
    assert_status(
        'ok',
        [annexjson2result(r, ds)
         for r in ds.repo.fsck(remote='bare-git', fast=True)])
    eq_(len(ds.repo.whereis('one.txt')), 3)
Exemple #48
0
def _test_bare_git_version_2(host, dspath, store):
    # Similarly to test_bare_git_version_1, this should ensure a bare git repo
    # at the store location for a dataset doesn't conflict with the ORA remote.
    # Note: Usability of git remote by annex depends on dataset layout version
    #       (dirhashlower vs. -mixed).
    #       For version 2 (mixed) upload via ORA and consumption via git should
    #       work. But not the other way around, since git-annex uses
    #       dirhashlower with bare repos.

    ds_path = Path(dspath)
    store = Path(store)
    ds = Dataset(ds_path).create()
    populate_dataset(ds)
    ds.save()

    bare_repo_path, _, _ = get_layout_locations(1, store, ds.id)
    # Use git to make sure the remote end is what git thinks a bare clone of it
    # should look like
    subprocess.run(['git', 'clone', '--bare',
                    quote_cmdlinearg(str(dspath)),
                    quote_cmdlinearg(str(bare_repo_path))
                    ])

    if host:
        url = "ria+ssh://{host}{path}".format(host=host,
                                              path=store)
    else:
        url = "ria+{}".format(store.as_uri())
    init_opts = common_init_opts + ['url={}'.format(url)]
    # set up store:
    io = SSHRemoteIO(host) if host else LocalIO()
    create_store(io, store, '1')
    # set up the dataset location, too.
    # Note: Dataset layout version 2 (dirhash mixed):
    create_ds_in_store(io, store, ds.id, '2', '1')

    # Now, let's have the bare repo as a git remote
    git_url = "ssh://{host}{path}".format(host=host, path=bare_repo_path) \
        if host else bare_repo_path.as_uri()
    ds.repo.add_remote('bare-git', git_url)
    ds.repo.enable_remote('bare-git')
    # and the ORA remote in addition:
    ds.repo.init_remote('ora-remote', options=init_opts)
    # upload keys via ORA:
    ds.repo.copy_to('.', 'ora-remote')
    # bare-git doesn't know yet:
    eq_(len(ds.repo.whereis('one.txt')), 2)
    # fsck to make availability known
    assert_status(
        'ok',
        [annexjson2result(r, ds)
         for r in ds.repo.fsck(remote='bare-git', fast=True)])
    eq_(len(ds.repo.whereis('one.txt')), 3)
    ds.drop('.')
    eq_(len(ds.repo.whereis('one.txt')), 2)
    # actually consumable via git remote:
    ds.repo.call_git(['annex', 'move', 'one.txt', '--from', 'bare-git'])
    eq_(len(ds.repo.whereis('one.txt')), 2)
    # now, move back via git - shouldn't be consumable via ORA
    ds.repo.call_git(['annex', 'move', 'one.txt', '--to', 'bare-git'])
    # fsck to make availability known, but there's nothing from POV of ORA:
    fsck_res = [annexjson2result(r, ds)
                for r in ds.repo.fsck(remote='ora-remote', fast=True)]
    assert_result_count(fsck_res,
                        1,
                        status='error',
                        message='** Based on the location log, one.txt\n'
                                '** was expected to be present, '
                                'but its content is missing.')
    assert_result_count(fsck_res, 1, status='ok')
    eq_(len(fsck_res), 2)
    eq_(len(ds.repo.whereis('one.txt')), 1)
def test_annotate_paths(dspath, nodspath):
    # this test doesn't use API`remove` to avoid circularities
    ds = make_demo_hierarchy_datasets(dspath, demo_hierarchy)
    ds.add('.', recursive=True)
    ok_clean_git(ds.path)

    with chpwd(dspath):
        # with and without an explicitly given path the result is almost the
        # same inside a dataset
        without_path = annotate_paths(on_failure='ignore')
        pwd_res = annotate_paths(path='.', on_failure='ignore')
        assert_result_count(
            without_path, 1, type='dataset', path=dspath)
        assert_result_count(
            pwd_res, 1, type='dataset', path=dspath, orig_request='.',
            raw_input=True)
        # make sure going into a subdataset vs giving it as a path has no
        # structural impact
        eq_(
            [{k: v for k, v in ap.items()
              if k not in ('registered_subds', 'raw_input', 'orig_request', 'refds')}
             for ap in annotate_paths(path='b', recursive=True)],
            [{k: v for k, v in ap.items()
              if k not in ('registered_subds', 'raw_input', 'orig_request', 'refds')}
             for ap in annotate_paths(dataset='b', recursive=True)])
    # now do it again, pointing to the ds directly
    res = ds.annotate_paths(on_failure='ignore')
    # no request, no refds, but otherwise the same
    eq_(len(res), len(pwd_res))
    eq_({k: pwd_res[0][k] for k in pwd_res[0]
         if k in ('path', 'type', 'action', 'status')},
        {k: res[0][k] for k in res[0]
         if k not in ('refds',)})

    # will refuse a path that is not a dataset as refds
    res = annotate_paths(dataset=nodspath, on_failure='ignore')
    assert_result_count(
        res, 1, status='error', path=nodspath,
        message='given reference dataset is not a dataset')

    # recursion with proper base dataset
    parentds = Dataset(opj(dspath, 'a'))
    base_res = parentds.annotate_paths(recursive=True)
    # needs to find 'aa' and the base
    assert_result_count(base_res, 2)
    assert_result_count(base_res, 2, type='dataset')
    assert_result_count(
        base_res, 1, type='dataset', parentds=parentds.path,
        path=opj(parentds.path, 'aa'), status='')
    # same recursion but without a base dataset
    res = annotate_paths(path=opj(dspath, 'a'), recursive=True)
    # needs to find 'aa' and 'a' again
    assert_result_count(res, 2)
    eq_(res[-1],
        {k: base_res[-1][k] for k in base_res[-1]
         if k not in ('refds',)})
    assert_result_count(
        res, 1, type='dataset', status='',
        # it does not auto-discover parent datasets without force or a refds
        #parentds=parentds.path,
        path=parentds.path)
    # but we can force parent discovery
    res = parentds.annotate_paths(
        path=opj(dspath, 'a'), recursive=True, force_parentds_discovery=True)
    assert_result_count(res, 2)
    assert_result_count(
        res, 1, type='dataset', status='', parentds=dspath,
        path=parentds.path)

    # recursion with multiple disjoint seeds, no common base
    eq_([basename(p) for p in annotate_paths(
         path=[opj(dspath, 'a'), opj(dspath, 'b', 'bb', 'bba')], recursive=True,
         result_xfm='paths')],
        ['a', 'aa', 'bba', 'bbaa'])

    # recursion with partially overlapping seeds, no duplicate results
    eq_([basename(p) for p in annotate_paths(
         path=[opj(dspath, 'b'), opj(dspath, 'b', 'bb', 'bba')], recursive=True,
         result_xfm='paths')],
        ['b', 'ba', 'bb', 'bba', 'bbaa'])

    # get straight from a file
    fpath = opj('a', 'aa', 'file_aa')
    res = ds.annotate_paths(fpath)
    assert_result_count(res, 1)
    assert_result_count(
        res, 1, orig_request=fpath, raw_input=True, type='file',
        path=opj(ds.path, fpath), parentds=opj(ds.path, 'a', 'aa'), status='')
    # now drop it
    dropres = ds.drop(fpath, check=False)
    assert_result_count(dropres, 1, path=res[0]['path'], status='ok')
    # ask for same file again, use 'notneeded' for unavailable to try trigger
    # any difference
    droppedres = ds.annotate_paths(fpath, unavailable_path_status='notneeded')
    # but we get the same result
    eq_(res, droppedres)

    # now try the same on an uninstalled dataset
    subdspath = opj('b', 'bb')
    # before
    before_res = ds.annotate_paths(subdspath, recursive=True,
                                   unavailable_path_status='error')
    assert_result_count(before_res, 3, status='', type='dataset')
    uninstall_res = ds.uninstall(subdspath, recursive=True, check=False)
    assert_result_count(uninstall_res, 3, status='ok', type='dataset')
    # after
    after_res = ds.annotate_paths(subdspath,
                                  unavailable_path_status='error',
                                  on_failure='ignore')
    # uninstall hides all low-level datasets
    assert_result_count(after_res, 1)
    # but for the top-most uninstalled one it merely reports absent state now
    assert_result_count(
        after_res, 1, state='absent',
        **{k: before_res[0][k] for k in before_res[0] if k not in ('state', 'status')})
    # however, this beauty doesn't come for free, so it can be disabled
    # which will make the uninstalled subdataset like a directory in the
    # parent (or even just a non-existing path, if the mountpoint dir isn't
    # present
    after_res = ds.annotate_paths(subdspath, force_subds_discovery=False)
    assert_result_count(
        after_res, 1, type='directory',
        path=before_res[0]['path'],
        parentds=before_res[0]['parentds'])
    # feed annotated paths into annotate_paths, it shouldn't change things
    # upon second run
    # datasets and file
    res = ds.annotate_paths(['.', fpath], recursive=True)
    # make a copy, just to the sure
    orig_res = deepcopy(res)
    assert_result_count(res, 7)
    # and in again, no recursion this time
    res_again = ds.annotate_paths(res)
    # doesn't change a thing
    eq_(orig_res, res_again)
    # and in again, with recursion this time
    res_recursion_again = ds.annotate_paths(res, recursive=True)
    assert_result_count(res_recursion_again, 7)
    # doesn't change a thing
    eq_(orig_res, res_recursion_again)
Exemple #50
0
def test_status(_path, linkpath):
    # do the setup on the real path, not the symlink, to have its
    # bugs not affect this test of status()
    ds = get_deeply_nested_structure(str(_path))
    if has_symlink_capability():
        # make it more complicated by default
        ut.Path(linkpath).symlink_to(_path, target_is_directory=True)
        path = linkpath
    else:
        path = _path

    ds = Dataset(path)
    if has_symlink_capability():
        assert ds.pathobj != ds.repo.pathobj

    # spotcheck that annex status reporting and availability evaluation
    # works
    assert_result_count(
        ds.status(annex='all', result_renderer=None),
        1,
        path=str(ds.pathobj / 'subdir' / 'annexed_file.txt'),
        key='MD5E-s5--275876e34cf609db118f3d84b799a790.txt',
        has_content=True,
        objloc=str(ds.repo.pathobj / '.git' / 'annex' / 'objects' /
                   # hashdir is different on windows
                   ('f33' if ds.repo.is_managed_branch() else '7p') /
                   ('94b' if ds.repo.is_managed_branch() else 'gp') /
                   'MD5E-s5--275876e34cf609db118f3d84b799a790.txt' /
                   'MD5E-s5--275876e34cf609db118f3d84b799a790.txt'))

    plain_recursive = ds.status(recursive=True, result_renderer=None)
    # check integrity of individual reports with a focus on how symlinks
    # are reported
    for res in plain_recursive:
        # anything that is an "intended" symlink should be reported
        # as such. In contrast, anything that is a symlink for mere
        # technical reasons (annex using it for something in some mode)
        # should be reported as the thing it is representing (i.e.
        # a file)
        if 'link2' in str(res['path']):
            assert res['type'] == 'symlink', res
        else:
            assert res['type'] != 'symlink', res
        # every item must report its parent dataset
        assert_in('parentds', res)

    # bunch of smoke tests
    # query of '.' is same as no path
    eq_(plain_recursive,
        ds.status(path='.', recursive=True, result_renderer=None))
    # duplicate paths do not change things
    eq_(plain_recursive,
        ds.status(path=['.', '.'], recursive=True, result_renderer=None))
    # neither do nested paths
    eq_(
        plain_recursive,
        ds.status(path=['.', 'subds_modified'],
                  recursive=True,
                  result_renderer=None))
    # when invoked in a subdir of a dataset it still reports on the full thing
    # just like `git status`, as long as there are no paths specified
    with chpwd(op.join(path, 'directory_untracked')):
        plain_recursive = status(recursive=True, result_renderer=None)
    # should be able to take absolute paths and yield the same
    # output
    eq_(plain_recursive,
        ds.status(path=ds.path, recursive=True, result_renderer=None))

    # query for a deeply nested path from the top, should just work with a
    # variety of approaches
    rpath = op.join('subds_modified', 'subds_lvl1_modified',
                    OBSCURE_FILENAME + u'_directory_untracked')
    apathobj = ds.pathobj / rpath
    apath = str(apathobj)
    # ds.repo.pathobj will have the symlink resolved
    arealpath = ds.repo.pathobj / rpath
    # TODO include explicit relative path in test
    for p in (rpath, apath, arealpath, None):
        if p is None:
            # change into the realpath of the dataset and
            # query with an explicit path
            with chpwd(ds.repo.path):
                res = ds.status(path=op.join('.', rpath), result_renderer=None)
        else:
            res = ds.status(path=p, result_renderer=None)
        assert_result_count(
            res,
            1,
            state='untracked',
            type='directory',
            refds=ds.path,
            # path always comes out a full path inside the queried dataset
            path=apath,
        )

    assert_result_count(ds.status(recursive=True, result_renderer=None),
                        1,
                        path=apath)
    # limiting recursion will exclude this particular path
    assert_result_count(ds.status(recursive=True,
                                  recursion_limit=1,
                                  result_renderer=None),
                        0,
                        path=apath)
    # negative limit is unlimited limit
    eq_(ds.status(recursive=True, recursion_limit=-1, result_renderer=None),
        ds.status(recursive=True, result_renderer=None))
def test_publish_recursive(pristine_origin, origin_path, src_path, dst_path,
                           sub1_pub, sub2_pub):

    # we will be publishing back to origin, so to not alter testrepo
    # we will first clone it
    origin = install(origin_path, source=pristine_origin, recursive=True)
    # prepare src
    source = install(src_path, source=origin.path, recursive=True)
    # we will be trying to push into this later on, need to give permissions...
    origin_sub2 = Dataset(opj(origin_path, '2'))
    origin_sub2.config.set('receive.denyCurrentBranch',
                           'updateInstead',
                           where='local')
    ## TODO this manual fixup is needed due to gh-1548 -- needs proper solution
    #os.remove(opj(origin_sub2.path, '.git'))
    #os.rename(opj(origin_path, '.git', 'modules', '2'), opj(origin_sub2.path, '.git'))

    # create plain git at target:
    target = GitRepo(dst_path, create=True)
    target.checkout("TMP", ["-b"])
    source.repo.add_remote("target", dst_path)

    # subdatasets have no remote yet, so recursive publishing should fail:
    res = publish(dataset=source,
                  to="target",
                  recursive=True,
                  on_failure='ignore')
    assert_result_count(res, 3)
    assert_result_count(res, 1, status='ok', type='dataset', path=source.path)
    assert_result_count(res,
                        2,
                        status='error',
                        message=("Unknown target sibling '%s' for publication",
                                 'target'))

    # now, set up targets for the submodules:
    sub1_target = GitRepo(sub1_pub, create=True)
    sub1_target.checkout("TMP", ["-b"])
    sub2_target = AnnexRepo(sub2_pub, create=True)
    # we will be testing presence of the file content, so let's make it progress
    sub2_target.config.set('receive.denyCurrentBranch',
                           'updateInstead',
                           where='local')
    sub1 = GitRepo(opj(src_path, 'subm 1'), create=False)
    sub2 = GitRepo(opj(src_path, '2'), create=False)
    sub1.add_remote("target", sub1_pub)
    sub2.add_remote("target", sub2_pub)

    # publish recursively
    with swallow_logs(new_level=logging.DEBUG) as cml:
        res = publish(dataset=source, to="target", recursive=True)
        assert_not_in('forced update', cml.out,
                      "we probably haven't merged git-annex before pushing")

    # testing result list
    # base dataset was already published above, notneeded again
    assert_status(('ok', 'notneeded'), res)  # nothing failed
    assert_result_count(res, 3, type='dataset')
    eq_({r['path'] for r in res}, {src_path, sub1.path, sub2.path})

    eq_(list(target.get_branch_commits_("master")),
        list(source.repo.get_branch_commits_("master")))
    assert_git_annex_branch_published(source.repo, target)
    eq_(list(sub1_target.get_branch_commits_("master")),
        list(sub1.get_branch_commits_("master")))
    assert_git_annex_branch_published(sub1, sub1_target)
    eq_(list(sub2_target.get_branch_commits_("master")),
        list(sub2.get_branch_commits_("master")))
    assert_git_annex_branch_published(sub2, sub2_target)

    # we are tracking origin but origin has different git-annex, since we
    # cloned from it, so it is not aware of our git-annex
    neq_(list(origin.repo.get_branch_commits_("git-annex")),
         list(source.repo.get_branch_commits_("git-annex")))
    # So if we first publish to it recursively, we would update
    # all sub-datasets since git-annex branch would need to be pushed
    res_ = publish(dataset=source, recursive=True)
    assert_result_count(res_, 1, status='ok', path=source.path)
    assert_result_count(res_, 1, status='ok', path=sub1.path)
    assert_result_count(res_, 1, status='ok', path=sub2.path)
    # and now should carry the same state for git-annex
    assert_git_annex_branch_published(source.repo, origin.repo)

    # test for publishing with  --since.  By default since no changes, nothing pushed
    res_ = publish(dataset=source, recursive=True)
    assert_result_count(res_, 3, status='notneeded', type='dataset')

    # still nothing gets pushed, because origin is up to date
    res_ = publish(dataset=source, recursive=True, since='HEAD^')
    assert_result_count(res_, 3, status='notneeded', type='dataset')

    # and we should not fail if we run it from within the dataset
    with chpwd(source.path):
        res_ = publish(recursive=True, since='HEAD^')
        assert_result_count(res_, 3, status='notneeded', type='dataset')

    # Let's now update one subm
    with open(opj(sub2.path, "file.txt"), 'w') as f:
        f.write('')
    # add to subdataset, does not alter super dataset!
    # MIH: use `to_git` because original test author used
    # and explicit `GitRepo.add` -- keeping this for now
    Dataset(sub2.path).save('file.txt', to_git=True)

    # Let's now update one subm
    create_tree(sub2.path, {'file.dat': 'content'})
    # add to subdataset, without reflecting the change in its super(s)
    Dataset(sub2.path).save('file.dat')

    # note: will publish to origin here since that is what it tracks
    res_ = publish(dataset=source, recursive=True, on_failure='ignore')
    ## only updates published, i.e. just the subdataset, super wasn't altered
    ## nothing copied!
    assert_status(('ok', 'notneeded'), res_)
    assert_result_count(res_, 1, status='ok', path=sub2.path, type='dataset')
    assert_result_count(res_, 0, path=opj(sub2.path, 'file.dat'), type='file')

    # since published to origin -- destination should not get that file
    nok_(lexists(opj(sub2_target.path, 'file.dat')))
    res_ = publish(dataset=source, to='target', recursive=True)
    assert_status(('ok', 'notneeded'), res_)
    assert_result_count(res_, 1, status='ok', path=sub2.path, type='dataset')
    assert_result_count(res_, 0, path=opj(sub2.path, 'file.dat'), type='file')

    # Note: with updateInstead only in target2 and not saving change in
    # super-dataset we would have made remote dataset, if we had entire
    # hierarchy, to be somewhat inconsistent.
    # But here, since target datasets are independent -- it is ok

    # and the file itself was transferred
    ok_(lexists(opj(sub2_target.path, 'file.dat')))
    nok_(sub2_target.file_has_content('file.dat'))

    ## but now we can redo publish recursively, with explicitly requested data transfer
    res_ = publish(dataset=source,
                   to='target',
                   recursive=True,
                   transfer_data='all')
    ok_(sub2_target.file_has_content('file.dat'))
    assert_result_count(res_, 1, status='ok', path=opj(sub2.path, 'file.dat'))

    # Let's save those present changes and publish while implying "since last
    # merge point"
    source.save(message="Changes in subm2")
    # and test if it could deduce the remote/branch to push to
    source.config.set('branch.master.remote', 'target', where='local')
    with chpwd(source.path):
        res_ = publish(since='', recursive=True)
    # TODO: somehow test that there were no even attempt to diff within "subm 1"
    # since if `--since=''` worked correctly, nothing has changed there and it
    # should have not been even touched
    assert_status(('ok', 'notneeded'), res_)
    assert_result_count(res_, 1, status='ok', path=source.path, type='dataset')

    # Don't fail when a string is passed as `dataset` and since="".
    assert_status("notneeded", publish(since='', dataset=source.path))
Exemple #52
0
def test_save_amend(dspath):

    dspath = Path(dspath)
    file_in_super = dspath / 'somefile'
    file_in_sub = dspath / 'subds' / 'file_in_sub'

    # test on a hierarchy including a plain git repo:
    ds = Dataset(dspath).create(force=True, no_annex=True)
    subds = ds.create('subds', force=True)
    ds.save(recursive=True)
    assert_repo_status(ds.repo)

    # recursive and amend are mutually exclusive:
    for d in (ds, subds):
        assert_raises(ValueError, d.save, recursive=True, amend=True)

    # in an annex repo the branch we are interested in might not be the active
    # branch (adjusted):
    sub_branch = subds.repo.get_corresponding_branch()

    # amend in subdataset w/ new message; otherwise empty amendment:
    last_sha = subds.repo.get_hexsha(sub_branch)
    subds.save(message="new message in sub", amend=True)
    # we did in fact commit something:
    neq_(last_sha, subds.repo.get_hexsha(sub_branch))
    # repo is clean:
    assert_repo_status(subds.repo)
    # message is correct:
    eq_(subds.repo.format_commit("%B", sub_branch).strip(),
        "new message in sub")
    # actually replaced the previous commit:
    assert_not_in(last_sha, subds.repo.get_branch_commits_(sub_branch))

    # amend modifications in subdataset w/o new message
    if not subds.repo.is_managed_branch():
        subds.unlock('file_in_sub')
    file_in_sub.write_text("modified again")
    last_sha = subds.repo.get_hexsha(sub_branch)
    subds.save(amend=True)
    neq_(last_sha, subds.repo.get_hexsha(sub_branch))
    assert_repo_status(subds.repo)
    # message unchanged:
    eq_(subds.repo.format_commit("%B", sub_branch).strip(),
        "new message in sub")
    # actually replaced the previous commit:
    assert_not_in(last_sha, subds.repo.get_branch_commits_(sub_branch))

    # save --amend with nothing to amend with:
    res = subds.save(amend=True)
    assert_result_count(res, 1)
    assert_result_count(res, 1, status='notneeded', action='save')

    # amend in superdataset w/ new message; otherwise empty amendment:
    last_sha = ds.repo.get_hexsha()
    ds.save(message="new message in super", amend=True)
    neq_(last_sha, ds.repo.get_hexsha())
    assert_repo_status(subds.repo)
    eq_(ds.repo.format_commit("%B").strip(), "new message in super")
    assert_not_in(last_sha, ds.repo.get_branch_commits_())

    # amend modifications in superdataset w/o new message
    file_in_super.write_text("changed content")
    if not subds.repo.is_managed_branch():
        subds.unlock('file_in_sub')
    file_in_sub.write_text("modified once again")
    last_sha = ds.repo.get_hexsha()
    last_sha_sub = subds.repo.get_hexsha(sub_branch)
    ds.save(amend=True)
    neq_(last_sha, ds.repo.get_hexsha())
    eq_(ds.repo.format_commit("%B").strip(), "new message in super")
    assert_not_in(last_sha, ds.repo.get_branch_commits_())
    # we didn't mess with the subds:
    assert_repo_status(ds.repo, modified=["subds"])
    eq_(last_sha_sub, subds.repo.get_hexsha(sub_branch))
    eq_(subds.repo.format_commit("%B", sub_branch).strip(),
        "new message in sub")

    # save --amend with nothing to amend with:
    last_sha = ds.repo.get_hexsha()
    res = ds.save(amend=True)
    assert_result_count(res, 1)
    assert_result_count(res, 1, status='notneeded', action='save')
    eq_(last_sha, ds.repo.get_hexsha())
    # we didn't mess with the subds:
    assert_repo_status(ds.repo, modified=["subds"])
    eq_(last_sha_sub, subds.repo.get_hexsha(sub_branch))
    eq_(subds.repo.format_commit("%B", sub_branch).strip(),
        "new message in sub")

    # amend with different identity:
    orig_author = ds.repo.format_commit("%an")
    orig_email = ds.repo.format_commit("%ae")
    orig_date = ds.repo.format_commit("%ad")
    orig_committer = ds.repo.format_commit("%cn")
    orig_committer_mail = ds.repo.format_commit("%ce")
    eq_(orig_author, orig_committer)
    eq_(orig_email, orig_committer_mail)
    with patch.dict('os.environ',
                    {'GIT_COMMITTER_NAME': 'Hopefully Different',
                     'GIT_COMMITTER_EMAIL': '*****@*****.**'}):

        ds.config.reload(force=True)
        ds.save(amend=True, message="amend with hope")
    # author was kept:
    eq_(orig_author, ds.repo.format_commit("%an"))
    eq_(orig_email, ds.repo.format_commit("%ae"))
    eq_(orig_date, ds.repo.format_commit("%ad"))
    # committer changed:
    eq_(ds.repo.format_commit("%cn"), "Hopefully Different")
    eq_(ds.repo.format_commit("%ce"), "*****@*****.**")

    # corner case: amend empty commit with no parent:
    rmtree(str(dspath))
    # When adjusted branch is enforced by git-annex detecting a crippled FS,
    # git-annex produces an empty commit before switching to adjusted branch:
    # "commit before entering adjusted branch"
    # The commit by `create` would be the second one already.
    # Therefore go with plain annex repo and create an (empty) commit only when
    # not on adjusted branch:
    repo = AnnexRepo(dspath, create=True)
    if not repo.is_managed_branch():
        repo.commit(msg="initial", options=['--allow-empty'])
    ds = Dataset(dspath)
    branch = ds.repo.get_corresponding_branch() or ds.repo.get_active_branch()
    # test pointless if we start with more than one commit
    eq_(len(list(ds.repo.get_branch_commits_(branch))),
        1,
        msg="More than on commit '{}': {}".format(
            branch, ds.repo.call_git(['log', branch]))
        )
    last_sha = ds.repo.get_hexsha(branch)

    ds.save(message="new initial commit", amend=True)
    assert_repo_status(ds.repo)
    eq_(len(list(ds.repo.get_branch_commits_(branch))),
        1,
        msg="More than on commit '{}': {}".format(
            branch, ds.repo.call_git(['log', branch]))
        )
    assert_not_in(last_sha, ds.repo.get_branch_commits_(branch))
    eq_(ds.repo.format_commit("%B", branch).strip(), "new initial commit")
Exemple #53
0
def test_basics(path, nodspath):
    ds = Dataset(path).create()
    last_state = ds.repo.get_hexsha()
    # run inside the dataset
    with chpwd(path), \
            swallow_outputs():
        # provoke command failure
        with assert_raises(CommandError) as cme:
            ds.run('7i3amhmuch9invalid')
            # let's not speculate that the exit code is always 127
            ok_(cme.code > 0)
        eq_(last_state, ds.repo.get_hexsha())
        # now one that must work
        res = ds.run('cd .> empty', message='TEST')
        assert_repo_status(ds.path)
        assert_result_count(res, 2)
        # TODO 'state' is still untracked!!!
        assert_result_count(res,
                            1,
                            action='add',
                            path=op.join(ds.path, 'empty'),
                            type='file')
        assert_result_count(res, 1, action='save', path=ds.path)
        # ATTN: Use master explicitly so that this check works when we're on an
        # adjusted branch too (e.g., when this test is executed under Windows).
        commit_msg = ds.repo.format_commit("%B", "master")
        ok_(commit_msg.startswith('[DATALAD RUNCMD] TEST'))
        # crude test that we have a record for the PWD
        assert_in('"pwd": "."', commit_msg)
        last_state = ds.repo.get_hexsha()
        # now run a command that will not alter the dataset
        noop_cmd = ':'
        res = ds.run(noop_cmd, message='NOOP_TEST')
        assert_result_count(res, 1, action='save', status='notneeded')
        eq_(last_state, ds.repo.get_hexsha())
        # We can also run the command via a single-item list because this is
        # what the CLI interface passes in for quoted commands.
        res = ds.run([noop_cmd], message='NOOP_TEST')
        assert_result_count(res, 1, action='save', status='notneeded')

    # run outside the dataset, should still work but with limitations
    with chpwd(nodspath), \
            swallow_outputs():
        res = ds.run('cd . > empty2', message='TEST')
        assert_result_count(res,
                            1,
                            action='add',
                            path=op.join(ds.path, 'empty2'),
                            type='file',
                            status='ok')
        assert_result_count(res, 1, action='save', status='ok')

    # running without a command is a noop
    with chpwd(path):
        with swallow_logs(new_level=logging.WARN) as cml:
            ds.run()
            assert_in("No command given", cml.out)
def test_publish_depends(origin, src_path, target1_path, target2_path,
                         target3_path):
    # prepare src
    source = install(src_path, source=origin, recursive=True)
    source.repo.get('test-annex.dat')
    # pollute config
    depvar = 'remote.target2.datalad-publish-depends'
    source.config.add(depvar, 'stupid', where='local')
    eq_(source.config.get(depvar, None), 'stupid')

    # two remote sibling on two "different" hosts
    source.create_sibling('ssh://localhost' + target1_path,
                          annex_wanted='standard',
                          annex_group='backup',
                          name='target1')
    # fails with unknown remote
    res = source.create_sibling(
        'ssh://datalad-test' + target2_path,
        name='target2',
        existing='reconfigure',  # because 'target2' is known in polluted cfg
        publish_depends='bogus',
        on_failure='ignore')
    assert_result_count(
        res,
        1,
        status='error',
        message=('unknown sibling(s) specified as publication dependency: %s',
                 set(['bogus'])))
    # for real
    source.create_sibling(
        'ssh://datalad-test' + target2_path,
        name='target2',
        existing='reconfigure',  # because 'target2' is known in polluted cfg
        annex_wanted='standard',
        annex_group='backup',
        publish_depends='target1')
    # wiped out previous dependencies
    eq_(source.config.get(depvar, None), 'target1')
    # and one more remote, on the same host but associated with a dependency
    source.create_sibling('ssh://datalad-test' + target3_path, name='target3')
    assert_repo_status(src_path)
    # introduce change in source
    create_tree(src_path, {'probe1': 'probe1'})
    source.save('probe1')
    assert_repo_status(src_path)
    # only the source has the probe
    ok_file_has_content(opj(src_path, 'probe1'), 'probe1')
    for p in (target1_path, target2_path, target3_path):
        assert_false(lexists(opj(p, 'probe1')))
    # publish to a standalone remote
    source.publish(to='target3')
    ok_(lexists(opj(target3_path, 'probe1')))
    # but it has no data copied
    target3 = Dataset(target3_path)
    nok_(target3.repo.file_has_content('probe1'))

    # but if we publish specifying its path, it gets copied
    source.publish('probe1', to='target3')
    ok_file_has_content(opj(target3_path, 'probe1'), 'probe1')

    # no others are affected in either case
    for p in (target1_path, target2_path):
        assert_false(lexists(opj(p, 'probe1')))

    # publish to all remaining, but via a dependency
    source.publish(to='target2')
    for p in (target1_path, target2_path, target3_path):
        ok_file_has_content(opj(p, 'probe1'), 'probe1')
def test_recurseinto(dspath, dest):
    # make fresh dataset hierarchy
    ds = make_demo_hierarchy_datasets(dspath, demo_hierarchy)
    ds.add('.', recursive=True)
    # label intermediate dataset as 'norecurseinto'
    res = Dataset(opj(ds.path, 'b')).subdatasets(
        contains='bb',
        set_property=[('datalad-recursiveinstall', 'skip')])
    assert_result_count(res, 1, path=opj(ds.path, 'b', 'bb'))
    ds.add('b/', recursive=True)
    ok_clean_git(ds.path)

    # recursive install, should skip the entire bb branch
    res = install(source=ds.path, path=dest, recursive=True,
                  result_xfm=None, result_filter=None)
    assert_result_count(res, 5)
    assert_result_count(res, 5, type='dataset')
    # we got the neighbor subdataset
    assert_result_count(res, 1, type='dataset',
                        path=opj(dest, 'b', 'ba'))
    # we did not get the one we wanted to skip
    assert_result_count(res, 0, type='dataset',
                        path=opj(dest, 'b', 'bb'))
    assert_not_in(
        opj(dest, 'b', 'bb'),
        Dataset(dest).subdatasets(fulfilled=True, result_xfm='paths'))
    assert(not Dataset(opj(dest, 'b', 'bb')).is_installed())

    # cleanup
    Dataset(dest).remove(recursive=True)
    assert(not lexists(dest))
    # again but just clone the base, and then get content and grab 'bb'
    # explicitly -- must get it installed
    dest = install(source=ds.path, path=dest)
    res = dest.get(['.', opj('b', 'bb')], get_data=False, recursive=True)
    assert_result_count(res, 8)
    assert_result_count(res, 8, type='dataset')
    assert_result_count(res, 1, type='dataset',
                        path=opj(dest.path, 'b', 'bb'))
    assert(Dataset(opj(dest.path, 'b', 'bb')).is_installed())
def test_publish_with_data(origin, src_path, dst_path, sub1_pub, sub2_pub,
                           dst_clone_path):

    # prepare src
    source = install(src_path, source=origin, recursive=True)
    source.repo.get('test-annex.dat')

    # create plain git at target:
    target = AnnexRepo(dst_path, create=True)
    target.checkout("TMP", ["-b"])
    source.repo.add_remote("target", dst_path)

    # now, set up targets for the submodules:
    # the need to be annexes, because we want to be able to copy data to them
    # further down
    sub1_target = AnnexRepo(sub1_pub, create=True)
    sub1_target.checkout("TMP", ["-b"])
    sub2_target = AnnexRepo(sub2_pub, create=True)
    sub2_target.checkout("TMP", ["-b"])
    sub1 = GitRepo(opj(src_path, 'subm 1'), create=False)
    sub2 = GitRepo(opj(src_path, '2'), create=False)
    sub1.add_remote("target", sub1_pub)
    sub2.add_remote("target", sub2_pub)

    res = publish(dataset=source,
                  to="target",
                  path=['test-annex.dat'],
                  result_xfm='paths')
    # first it would publish data and then push
    # TODO order is not fixed (yet)
    #eq_(res, [opj(source.path, 'test-annex.dat'), source.path])
    eq_(set(res), set([opj(source.path, 'test-annex.dat'), source.path]))
    # XXX master was not checked out in dst!

    eq_(list(target.get_branch_commits_("master")),
        list(source.repo.get_branch_commits_("master")))
    assert_git_annex_branch_published(source.repo, target)

    # we need compare target/master:
    target.checkout("master")
    ok_(target.file_has_content('test-annex.dat'))

    # make sure that whatever we published is actually consumable
    dst_clone = install(dst_clone_path,
                        source=dst_path,
                        result_xfm='datasets',
                        return_type='item-or-list')
    nok_(dst_clone.repo.file_has_content('test-annex.dat'))
    res = dst_clone.get('test-annex.dat')
    ok_(dst_clone.repo.file_has_content('test-annex.dat'))

    res = publish(dataset=source, to="target", path=['.'])
    # there is nothing to publish on 2nd attempt
    #eq_(res, ([source, 'test-annex.dat'], []))
    assert_result_count(res, 1, status='notneeded')

    import glob
    res = publish(dataset=source,
                  to="target",
                  path=glob.glob1(source.path, '*'))
    # Note: This leads to recursive publishing, since expansion of '*'
    #       contains the submodules themselves in this setup

    # only the subdatasets, targets are plain git repos, hence
    # no file content is pushed, all content in super was pushed
    # before
    assert_result_count(res, 3)
    assert_result_count(res, 1, status='ok', path=sub1.path)
    assert_result_count(res, 1, status='ok', path=sub2.path)
    assert_result_count(res, 1, status='notneeded', path=source.path)

    # if we publish again -- nothing to be published
    res = source.publish(to="target")
    assert_result_count(res, 1, status='notneeded', path=source.path)
    # if we drop a file and publish again -- dataset should be published
    # since git-annex branch was updated
    source.drop('test-annex.dat')
    res = source.publish(to="target")
    assert_result_count(res, 1, status='ok', path=source.path)
    # and empty again if we try again
    res = source.publish(to="target")
    assert_result_count(res, 1, status='notneeded', path=source.path)

    # data integrity check looks identical from all perspectives
    # minus "note" statements from git-annex
    eq_(filter_fsck_error_msg(source.repo.fsck()),
        filter_fsck_error_msg(source.repo.fsck(remote='target')))
    eq_(filter_fsck_error_msg(target.fsck()),
        filter_fsck_error_msg(source.repo.fsck(remote='target')))
Exemple #57
0
def check_push(annex, src_path, dst_path):
    # prepare src
    src = Dataset(src_path).create(annex=annex)
    src_repo = src.repo
    # push should not add branches to the local dataset
    orig_branches = src_repo.get_branches()
    assert_not_in('synced/' + DEFAULT_BRANCH, orig_branches)

    res = src.push(on_failure='ignore')
    assert_result_count(res, 1)
    assert_in_results(
        res,
        status='impossible',
        message='No push target given, and none could be auto-detected, '
        'please specify via --to')
    eq_(orig_branches, src_repo.get_branches())
    # target sibling
    target = mk_push_target(src, 'target', dst_path, annex=annex)
    eq_(orig_branches, src_repo.get_branches())

    res = src.push(to="target")
    eq_(orig_branches, src_repo.get_branches())
    assert_result_count(res, 2 if annex else 1)
    assert_in_results(res,
                      action='publish',
                      status='ok',
                      target='target',
                      refspec=DEFAULT_REFSPEC,
                      operations=['new-branch'])

    assert_repo_status(src_repo, annex=annex)
    eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)),
        list(src_repo.get_branch_commits_(DEFAULT_BRANCH)))

    # configure a default merge/upstream target
    src.config.set('branch.{}.remote'.format(DEFAULT_BRANCH),
                   'target',
                   where='local')
    src.config.set('branch.{}.merge'.format(DEFAULT_BRANCH),
                   DEFAULT_BRANCH,
                   where='local')

    # don't fail when doing it again, no explicit target specification
    # needed anymore
    res = src.push()
    eq_(orig_branches, src_repo.get_branches())
    # and nothing is pushed
    assert_status('notneeded', res)

    assert_repo_status(src_repo, annex=annex)
    eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)),
        list(src_repo.get_branch_commits_(DEFAULT_BRANCH)))

    # some modification:
    (src.pathobj / 'test_mod_file').write_text("Some additional stuff.")
    src.save(to_git=True, message="Modified.")
    (src.pathobj / 'test_mod_annex_file').write_text("Heavy stuff.")
    src.save(to_git=not annex, message="Modified again.")
    assert_repo_status(src_repo, annex=annex)

    # we could say since='HEAD~2' to make things fast, or we are lazy
    # and say since='^' to indicate the state of the tracking remote
    # which is the same, because we made to commits since the last push.
    res = src.push(to='target', since="^", jobs=2)
    assert_in_results(
        res,
        action='publish',
        status='ok',
        target='target',
        refspec=DEFAULT_REFSPEC,
        # we get to see what happened
        operations=['fast-forward'])
    if annex:
        # we got to see the copy result for the annexed files
        assert_in_results(res,
                          action='copy',
                          status='ok',
                          path=str(src.pathobj / 'test_mod_annex_file'))
        # we published, so we can drop and reobtain
        ok_(src_repo.file_has_content('test_mod_annex_file'))
        src_repo.drop('test_mod_annex_file')
        ok_(not src_repo.file_has_content('test_mod_annex_file'))
        src_repo.get('test_mod_annex_file')
        ok_(src_repo.file_has_content('test_mod_annex_file'))
        ok_file_has_content(src_repo.pathobj / 'test_mod_annex_file',
                            'Heavy stuff.')

    eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)),
        list(src_repo.get_branch_commits_(DEFAULT_BRANCH)))
    if not (annex and src_repo.is_managed_branch()):
        # the following doesn't make sense in managed branches, because
        # a commit that could be amended is no longer the last commit
        # of a branch after a sync has happened (which did happen
        # during the last push above

        # amend and change commit msg in order to test for force push:
        src_repo.commit("amended", options=['--amend'])
        # push should be rejected (non-fast-forward):
        res = src.push(to='target', since='HEAD~2', on_failure='ignore')
        # fails before even touching the annex branch
        assert_in_results(res,
                          action='publish',
                          status='error',
                          target='target',
                          refspec=DEFAULT_REFSPEC,
                          operations=['rejected', 'error'])
        # push with force=True works:
        res = src.push(to='target', since='HEAD~2', force='gitpush')
        assert_in_results(res,
                          action='publish',
                          status='ok',
                          target='target',
                          refspec=DEFAULT_REFSPEC,
                          operations=['forced-update'])
        eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)),
            list(src_repo.get_branch_commits_(DEFAULT_BRANCH)))

    # we do not have more branches than we had in the beginning
    # in particular no 'synced/<default branch>'
    eq_(orig_branches, src_repo.get_branches())
Exemple #58
0
def test_uninstall_without_super(path):
    # a parent dataset with a proper subdataset, and another dataset that
    # is just placed underneath the parent, but not an actual subdataset
    parent = Dataset(path).create()
    sub = parent.create('sub')
    ok_clean_git(parent.path)
    nosub = create(opj(parent.path, 'nosub'))
    ok_clean_git(nosub.path)
    subreport = parent.subdatasets()
    assert_result_count(subreport, 1, path=sub.path)
    assert_result_count(subreport, 0, path=nosub.path)
    # it should be possible to uninstall the proper subdataset, even without
    # explicitly calling the uninstall methods of the parent -- things should
    # be figured out by datalad
    uninstall(sub.path)
    assert not sub.is_installed()
    # no present subdatasets anymore
    subreport = parent.subdatasets()
    assert_result_count(subreport, 1)
    assert_result_count(subreport, 1, path=sub.path, state='absent')
    assert_result_count(subreport, 0, path=nosub.path)
    # but we should fail on an attempt to uninstall the non-subdataset
    res = uninstall(nosub.path, on_failure='ignore')
    assert_result_count(
        res, 1, path=nosub.path, status='error',
        message="will not uninstall top-level dataset (consider `remove` command)")
Exemple #59
0
def test_partial_aggregation(path):
    ds = Dataset(path).create(force=True)
    sub1 = ds.create('sub1', force=True)
    sub2 = ds.create('sub2', force=True)
    ds.save(recursive=True)

    # if we aggregate a path(s) and say to recurse, we must not recurse into
    # the dataset itself and aggregate others
    ds.meta_aggregate(path='sub1', recursive=True)
    res = ds.meta_dump(reporton='aggregates', recursive=True)
    assert_result_count(res, 1, path=ds.path)
    assert_result_count(res, 1, path=sub1.path)
    # so no metadata aggregates for sub2 yet
    assert_result_count(res, 0, path=sub2.path)

    ds.meta_aggregate(recursive=True)
    origsha = ds.repo.get_hexsha()
    assert_repo_status(ds.path)
    # baseline, recursive aggregation gets us something for all three datasets
    res = ds.meta_dump(reporton='aggregates', recursive=True)
    assert_result_count(res, 3)
    # now let's do partial aggregation from just one subdataset
    # we should not loose information on the other datasets
    # as this would be a problem any time anything in a dataset
    # subtree is missing: not installed, too expensive to reaggregate, ...
    ds.meta_aggregate(path='sub1')
    eq_(origsha, ds.repo.get_hexsha())
    res = ds.meta_dump(reporton='aggregates', recursive=True)
    assert_result_count(res, 3)
    assert_result_count(res, 1, path=sub2.path)
    # nothing changes, so no commit
    ds.meta_aggregate(path='sub1')
    eq_(origsha, ds.repo.get_hexsha())
    # and the same thing again, doesn't ruin the state either
    ds.meta_aggregate(path='sub1')
    eq_(origsha, ds.repo.get_hexsha())
    # from-scratch aggregation kills datasets that where not listed
    # note the trailing separator that indicated that path refers
    # to the content of the subdataset, not the subdataset record
    # in the superdataset
    ds.meta_aggregate(path='sub1' + op.sep, force='fromscratch')
    res = ds.meta_dump(reporton='aggregates', recursive=True)
    assert_result_count(res, 1)
    assert_result_count(res, 1, path=sub1.path)
    # now reaggregated in full
    ds.meta_aggregate(recursive=True)
    # make change in sub1
    sub1.unlock('here')
    with open(op.join(sub1.path, 'here'), 'w') as f:
        f.write('fresh')
    ds.save(recursive=True)
    assert_repo_status(path)
def test_bare_git(origin, remote_base_path):

    remote_base_path = Path(remote_base_path)

    # This test should take a dataset and create a bare repository at the remote end from it.
    # Given, that it is placed correctly within a tree of dataset, that remote thing should then be usable as a
    # ria-remote as well as as a git-type remote

    ds = create(origin)
    populate_dataset(ds)
    ds.save()
    assert_repo_status(ds.path)

    # Use git to make sure the remote end is what git thinks a bare clone of it should look like
    bare_repo_path = remote_base_path / ds.id[:3] / ds.id[3:]
    subprocess.run(['git', 'clone', '--bare', origin, str(bare_repo_path)])

    # Now, let's have the bare repo as a git remote and use it with annex
    eq_(
        subprocess.run(
            ['git', 'remote', 'add', 'bare-git',
             str(bare_repo_path)],
            cwd=origin).returncode, 0)
    eq_(
        subprocess.run(['git', 'annex', 'enableremote', 'bare-git'],
                       cwd=origin).returncode, 0)
    eq_(
        subprocess.run(['git', 'annex', 'testremote', 'bare-git'],
                       cwd=origin).returncode, 0)
    # copy files to the remote
    ds.repo.copy_to('.', 'bare-git')
    eq_(len(ds.repo.whereis('one.txt')), 2)

    # now we can drop all content locally, reobtain it, and survive an
    # fsck
    ds.drop('.')
    ds.get('.')
    assert_status('ok', [annexjson2result(r, ds) for r in ds.repo.fsck()])

    # Since we created the remote this particular way instead of letting ria-remote create it, we need to put
    # ria-layout-version files into it. Then we should be able to also add it as a ria-remote.
    with open(str(remote_base_path / 'ria-layout-version'), 'w') as f:
        f.write('1')
    with open(str(bare_repo_path / 'ria-layout-version'), 'w') as f:
        f.write('1')

    # Now, add the ria remote:
    initexternalremote(ds.repo,
                       'riaremote',
                       'ria',
                       config={'base-path': str(remote_base_path)})
    # fsck to make availability known
    assert_status('ok', [
        annexjson2result(r, ds)
        for r in ds.repo.fsck(remote='riaremote', fast=True)
    ])
    eq_(len(ds.repo.whereis('one.txt')), 3)

    # Now move content from git-remote to local and see it not being available via bare-git anymore
    eq_(
        subprocess.run(['git', 'annex', 'move', '--all', '--from=bare-git'],
                       cwd=origin).returncode, 0)
    # ria-remote doesn't know yet:
    eq_(len(ds.repo.whereis('one.txt')), 2)

    # But after fsck it does:
    fsck_res = [
        annexjson2result(r, ds)
        for r in ds.repo.fsck(remote='riaremote', fast=True)
    ]
    assert_result_count(
        fsck_res,
        1,
        status='error',
        message=
        '** Based on the location log, one.txt\n** was expected to be present, '
        'but its content is missing.')
    assert_result_count(
        fsck_res,
        1,
        status='error',
        message=
        '** Based on the location log, subdir/two\n** was expected to be present, '
        'but its content is missing.')

    eq_(len(ds.repo.whereis('one.txt')), 1)