Beispiel #1
0
def test_rerun_assume_ready(path):
    ds = Dataset(path).create()
    repo = ds.repo
    (repo.pathobj / "f1").write_text("f1\n")
    ds.save()

    def double_in_both_cmd(src, dest1, dest2):
        return [
            sys.executable, "-c",
            "import sys; import os; import os.path as op; "
            "content = open(sys.argv[-3]).read() * 2; "
            "d1 = sys.argv[-2]; d2 = sys.argv[-1]; "
            "op.lexists(d1) and os.unlink(d1); "
            "op.lexists(d2) and os.unlink(d2); "
            "open(d1, 'w').write(content); open(d2, 'w').write(content)", src,
            dest1, dest2
        ]

    ds.run(double_in_both_cmd("f1", "out1", "out2"), outputs=["out1"])
    # Drop the content so that we remove instead of unlock, making the test is
    # more meaningful on an adjusted branch.
    ds.drop(["out1", "out2"], check=False)
    # --assume-ready affects both explicitly specified and automatic outputs.
    res = ds.rerun(assume_ready="outputs")
    assert_not_in_results(res, action="remove")
Beispiel #2
0
def test_basic_aggregate(path=None):
    # TODO give datasets some more metadata to actually aggregate stuff
    base = Dataset(opj(path, 'origin')).create(force=True)
    sub = base.create('sub', force=True)
    #base.metadata(sub.path, init=dict(homepage='this'), apply2global=True)
    subsub = base.create(opj('sub', 'subsub'), force=True)
    base.save(recursive=True)
    assert_repo_status(base.path)
    # we will first aggregate the middle dataset on its own, this will
    # serve as a smoke test for the reuse of metadata objects later on
    sub.aggregate_metadata()
    base.save()
    assert_repo_status(base.path)
    base.aggregate_metadata(recursive=True, update_mode='all')
    assert_repo_status(base.path)
    direct_meta = base.metadata(recursive=True, return_type='list')
    # loose the deepest dataset
    sub.drop('subsub', what='all', reckless='kill', recursive=True)
    # no we should eb able to reaggregate metadata, and loose nothing
    # because we can aggregate aggregated metadata of subsub from sub
    base.aggregate_metadata(recursive=True, update_mode='all')
    # same result for aggregate query than for (saved) direct query
    agg_meta = base.metadata(recursive=True, return_type='list')
    for d, a in zip(direct_meta, agg_meta):
        print(d['path'], a['path'])
        assert_dict_equal(d, a)
    # no we can throw away the subdataset tree, and loose no metadata
    base.drop('sub', what='all', reckless='kill', recursive=True)
    assert (not sub.is_installed())
    assert_repo_status(base.path)
    # same result for aggregate query than for (saved) direct query
    agg_meta = base.metadata(recursive=True, return_type='list')
    for d, a in zip(direct_meta, agg_meta):
        assert_dict_equal(d, a)
def test_report_absent_keys(path):
    ds = Dataset(path).create()
    # create an annexed file
    testfile = ds.pathobj / 'dummy'
    testfile.write_text(u'nothing')
    ds.save()
    # present in a full report and in a partial report
    # based on worktree of HEAD ref
    for ai in (ds.repo.get_content_annexinfo(eval_availability=True),
               ds.repo.get_content_annexinfo(paths=['dummy'],
                                             eval_availability=True),
               ds.repo.get_content_annexinfo(ref='HEAD',
                                             eval_availability=True),
               ds.repo.get_content_annexinfo(ref='HEAD',
                                             paths=['dummy'],
                                             eval_availability=True)):
        assert_in(testfile, ai)
        assert_equal(ai[testfile]['has_content'], True)
    # drop the key, not available anywhere else
    ds.drop('dummy', check=False)
    # does not change a thing, except the key is gone
    for ai in (ds.repo.get_content_annexinfo(eval_availability=True),
               ds.repo.get_content_annexinfo(paths=['dummy'],
                                             eval_availability=True),
               ds.repo.get_content_annexinfo(ref='HEAD',
                                             eval_availability=True),
               ds.repo.get_content_annexinfo(ref='HEAD',
                                             paths=['dummy'],
                                             eval_availability=True)):
        assert_in(testfile, ai)
        assert_equal(ai[testfile]['has_content'], False)
Beispiel #4
0
def test_reaggregate_with_unavailable_objects(path):
    base = Dataset(opj(path, 'origin')).create(force=True)
    # force all metadata objects into the annex
    with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f:
        f.write(
            '** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n')
    sub = base.create('sub', force=True)
    subsub = base.create(opj('sub', 'subsub'), force=True)
    base.add('.', recursive=True)
    ok_clean_git(base.path)
    base.aggregate_metadata(recursive=True, update_mode='all')
    ok_clean_git(base.path)
    objpath = opj('.datalad', 'metadata', 'objects')
    objs = list(sorted(base.repo.find(objpath)))
    # we have 3x2 metadata sets (dataset/files) under annex
    eq_(len(objs), 6)
    eq_(all(base.repo.file_has_content(objs)), True)
    # drop all object content
    base.drop(objs, check=False)
    eq_(all(base.repo.file_has_content(objs)), False)
    ok_clean_git(base.path)
    # now re-aggregate, the state hasn't changed, so the file names will
    # be the same
    base.aggregate_metadata(recursive=True, update_mode='all', force_extraction=True)
    eq_(all(base.repo.file_has_content(objs)), True)
    # and there are no new objects
    eq_(
        objs,
        list(sorted(base.repo.find(objpath)))
    )
Beispiel #5
0
def test_reaggregate_with_unavailable_objects(path):
    base = Dataset(op.join(path, 'origin')).create(force=True)
    # force all metadata objects into the annex
    with open(op.join(base.path, '.datalad', '.gitattributes'), 'w') as f:
        f.write(
            '** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n'
        )
    sub = base.create('sub', force=True)
    subsub = base.create(op.join('sub', 'subsub'), force=True)
    base.save(recursive=True)
    assert_repo_status(base.path)
    # first a quick check that an unsupported 'into' mode causes an exception
    assert_raises(ValueError,
                  base.meta_aggregate,
                  recursive=True,
                  into='spaceship')
    # no for real
    base.meta_aggregate(recursive=True, into='all')
    assert_repo_status(base.path)
    objpath = op.join('.datalad', 'metadata', 'objects')
    objs = list(sorted(base.repo.find(objpath)))
    # we have 3x2 metadata sets (dataset/files) under annex
    eq_(len(objs), 6)
    eq_(all(base.repo.file_has_content(objs)), True)
    # drop all object content
    base.drop(objs, check=False)
    eq_(all(base.repo.file_has_content(objs)), False)
    assert_repo_status(base.path)
    # now re-aggregate, the state hasn't changed, so the file names will
    # be the same
    base.meta_aggregate(recursive=True, into='all', force='fromscratch')
    eq_(all(base.repo.file_has_content(objs)), True)
    # and there are no new objects
    eq_(objs, list(sorted(base.repo.find(objpath))))
Beispiel #6
0
def test_reaggregate_with_unavailable_objects(path):
    base = Dataset(opj(path, 'origin')).create(force=True)
    # force all metadata objects into the annex
    with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f:
        f.write(
            '** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n'
        )
    sub = base.create('sub', force=True)
    subsub = base.create(opj('sub', 'subsub'), force=True)
    base.save(recursive=True)
    assert_repo_status(base.path)
    base.aggregate_metadata(recursive=True, update_mode='all')
    assert_repo_status(base.path)
    objpath = opj('.datalad', 'metadata', 'objects')
    objs = list(sorted(base.repo.find(objpath)))
    # we have 3x2 metadata sets (dataset/files) under annex
    eq_(len(objs), 6)
    eq_(all(base.repo.file_has_content(objs)), True)
    # drop all object content
    base.drop(objs, check=False)
    eq_(all(base.repo.file_has_content(objs)), False)
    assert_repo_status(base.path)
    # now re-aggregate, the state hasn't changed, so the file names will
    # be the same
    base.aggregate_metadata(recursive=True,
                            update_mode='all',
                            force_extraction=True)
    eq_(all(base.repo.file_has_content(objs)), True)
    # and there are no new objects
    eq_(objs, list(sorted(base.repo.find(objpath))))
Beispiel #7
0
def test_reaggregate_with_unavailable_objects(path):
    base = Dataset(opj(path, 'origin')).create(force=True)
    # force all metadata objects into the annex
    with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f:
        f.write(
            '** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n'
        )
    sub = base.create('sub', force=True)
    subsub = base.create(opj('sub', 'subsub'), force=True)
    base.add('.', recursive=True)
    ok_clean_git(base.path)
    base.aggregate_metadata(recursive=True, update_mode='all')
    ok_clean_git(base.path)
    objpath = opj('.datalad', 'metadata', 'objects')
    # weird that it comes out as a string...
    objs = [o for o in sorted(base.repo.find(objpath).split('\n')) if o]
    # we have 3x2 metadata sets (dataset/files) under annex
    eq_(len(objs), 6)
    eq_(all(base.repo.file_has_content(objs)), True)
    # drop all object content
    base.drop(objs, check=False)
    eq_(all(base.repo.file_has_content(objs)), False)
    ok_clean_git(base.path)
    # now re-aggregate, the state hasn't changed, so the file names will
    # be the same
    base.aggregate_metadata(recursive=True, update_mode='all')
    eq_(all(base.repo.file_has_content(objs)), True)
    # and there are no new objects
    eq_(objs, [o for o in sorted(base.repo.find(objpath).split('\n')) if o])
Beispiel #8
0
def test_push_wanted(srcpath, dstpath):
    src = Dataset(srcpath).create()

    if src.repo.is_managed_branch():
        # on crippled FS post-update hook enabling via create-sibling doesn't
        # work ATM
        raise SkipTest("no create-sibling on crippled FS")
    (src.pathobj / 'data.0').write_text('0')
    (src.pathobj / 'secure.1').write_text('1')
    (src.pathobj / 'secure.2').write_text('2')
    src.save()

    # Dropping a file to mimic a case of simply not having it locally (thus not
    # to be "pushed")
    src.drop('secure.2', check=False)

    # Annotate sensitive content, actual value "verysecure" does not matter in
    # this example
    src.repo.set_metadata(add={'distribution-restrictions': 'verysecure'},
                          files=['secure.1', 'secure.2'])

    src.create_sibling(
        dstpath,
        annex_wanted="not metadata=distribution-restrictions=*",
        name='target',
    )
    # check that wanted is obeyed, if instructed by configuration
    src.config.set('datalad.push.copy-auto-if-wanted', 'true', where='local')
    res = src.push(to='target')
    assert_in_results(res,
                      action='copy',
                      path=str(src.pathobj / 'data.0'),
                      status='ok')
    for p in ('secure.1', 'secure.2'):
        assert_not_in_results(res, path=str(src.pathobj / p))
    assert_status('notneeded', src.push(to='target'))

    # check that dataset-config cannot overrule this
    src.config.set('datalad.push.copy-auto-if-wanted',
                   'false',
                   where='dataset')
    res = src.push(to='target')
    assert_status('notneeded', res)

    # check the target to really make sure
    dst = Dataset(dstpath)
    # normal file, yes
    eq_((dst.pathobj / 'data.0').read_text(), '0')
    # secure file, no
    if dst.repo.is_managed_branch():
        neq_((dst.pathobj / 'secure.1').read_text(), '1')
    else:
        assert_raises(FileNotFoundError, (dst.pathobj / 'secure.1').read_text)

    # remove local config, must enable push of secure file
    src.config.unset('datalad.push.copy-auto-if-wanted', where='local')
    res = src.push(to='target')
    assert_in_results(res, path=str(src.pathobj / 'secure.1'))
    eq_((dst.pathobj / 'secure.1').read_text(), '1')
Beispiel #9
0
def test_drop_file_need_nocheck(path=None):
    ds = Dataset(path).create(force=True)
    ds.save()
    with assert_raises(IncompleteResultsError) as cme:
        ds.drop("foo")
    # The --force suggestion from git-annex-drop is translated to --reckless.
    assert_in("--reckless", str(cme.value))
    assert_status("ok", ds.drop("foo", reckless='kill', on_failure="ignore"))
Beispiel #10
0
def test_drop_file_need_nocheck(path):
    ds = Dataset(path).create(force=True)
    ds.save()
    with assert_raises(IncompleteResultsError) as cme:
        ds.drop("foo")
    # The --force suggestion from git-annex-drop is translated to --nocheck.
    assert_in("--nocheck", str(cme.exception))
    assert_status("ok", ds.drop("foo", check=False, on_failure="ignore"))
Beispiel #11
0
def test_create_raises(path=None, outside_path=None):
    ds = Dataset(path)
    # incompatible arguments (annex only):
    assert_raises(ValueError, ds.create, annex=False, description='some')

    with open(op.join(path, "somefile.tst"), 'w') as f:
        f.write("some")
    # non-empty without `force`:
    assert_in_results(
        ds.create(force=False, **raw),
        status='error',
        message=
        'will not create a dataset in a non-empty directory, use `--force` option to ignore'
    )
    # non-empty with `force`:
    ds.create(force=True)
    # create sub outside of super:
    assert_in_results(
        ds.create(outside_path, **raw),
        status='error',
        message=(
            'dataset containing given paths is not underneath the reference '
            'dataset %s: %s', ds, outside_path))
    obscure_ds = u"ds-" + OBSCURE_FILENAME
    # create a sub:
    ds.create(obscure_ds)
    # fail when doing it again
    assert_in_results(ds.create(obscure_ds, **raw),
                      status='error',
                      message=('collision with %s (dataset) in dataset %s',
                               str(ds.pathobj / obscure_ds), ds.path))

    # now deinstall the sub and fail trying to create a new one at the
    # same location
    ds.drop(obscure_ds, what='all', reckless='kill', recursive=True)
    assert_in(obscure_ds, ds.subdatasets(state='absent',
                                         result_xfm='relpaths'))
    # and now should fail to also create inplace or under
    assert_in_results(ds.create(obscure_ds, **raw),
                      status='error',
                      message=('collision with %s (dataset) in dataset %s',
                               str(ds.pathobj / obscure_ds), ds.path))
    assert_in_results(ds.create(op.join(obscure_ds, 'subsub'), **raw),
                      status='error',
                      message=('collision with %s (dataset) in dataset %s',
                               str(ds.pathobj / obscure_ds), ds.path))
    os.makedirs(op.join(ds.path, 'down'))
    with open(op.join(ds.path, 'down', "someotherfile.tst"), 'w') as f:
        f.write("someother")
    ds.save()
    assert_in_results(
        ds.create('down', **raw),
        status='error',
        message=('collision with content in parent dataset at %s: %s', ds.path,
                 [str(ds.pathobj / 'down' / 'someotherfile.tst')]),
    )
Beispiel #12
0
def test_report_absent_keys(path=None):
    ds = Dataset(path).create()
    # create an annexed file
    testfile = ds.pathobj / 'dummy'
    testfile.write_text(u'nothing')
    ds.save()
    # present in a full report and in a partial report
    # based on worktree of HEAD ref
    for ai in (ds.repo.get_content_annexinfo(eval_availability=True),
               ds.repo.get_content_annexinfo(paths=['dummy'],
                                             eval_availability=True),
               ds.repo.get_content_annexinfo(ref='HEAD',
                                             eval_availability=True),
               ds.repo.get_content_annexinfo(ref='HEAD',
                                             paths=['dummy'],
                                             eval_availability=True)):
        assert_in(testfile, ai)
        assert_equal(ai[testfile]['has_content'], True)
    # drop the key, not available anywhere else
    ds.drop('dummy', reckless='kill')
    # does not change a thing, except the key is gone
    for ai in (ds.repo.get_content_annexinfo(eval_availability=True),
               ds.repo.get_content_annexinfo(paths=['dummy'],
                                             eval_availability=True),
               ds.repo.get_content_annexinfo(ref='HEAD',
                                             eval_availability=True),
               ds.repo.get_content_annexinfo(ref='HEAD',
                                             paths=['dummy'],
                                             eval_availability=True)):
        assert_in(testfile, ai)
        assert_equal(ai[testfile]['has_content'], False)
    # make sure files with URL keys are correctly reported:
    from datalad.conftest import test_http_server
    remote_file_name = 'imaremotefile.dat'
    local_file_name = 'mehasurlkey'
    (Path(test_http_server.path) / remote_file_name).write_text("weee")
    remote_file_url = f'{test_http_server.url}/{remote_file_name}'
    # we need to get a file with a URL key and check its local availability
    ds.repo.call_annex(
        ['addurl', '--relaxed', remote_file_url, '--file', local_file_name])
    ds.save("URL keys!")
    # should not be there
    res = ds.repo.get_file_annexinfo(local_file_name, eval_availability=True)
    assert_equal(res['has_content'], False)
    ds.get(local_file_name)
    # should be there
    res = ds.repo.get_file_annexinfo(local_file_name, eval_availability=True)
    assert_equal(res['has_content'], True)
Beispiel #13
0
def test_clone_unborn_head(path):
    ds_origin = Dataset(op.join(path, "a")).create()
    repo = ds_origin.repo
    managed = repo.is_managed_branch()

    # The setup below is involved, mostly because it's accounting for adjusted
    # branches. The scenario itself isn't so complicated, though:
    #
    #   * a checked out default branch with no commits
    #   * a (potentially adjusted) "abc" branch with commits.
    #   * a (potentially adjusted) "chooseme" branch whose tip commit has a
    #     more recent commit than any in "abc".
    (ds_origin.pathobj / "foo").write_text("foo content")
    ds_origin.save(message="foo")
    for res in repo.for_each_ref_(fields="refname"):
        ref = res["refname"]
        if DEFAULT_BRANCH in ref:
            repo.update_ref(ref.replace(DEFAULT_BRANCH, "abc"), ref)
            repo.call_git(["update-ref", "-d", ref])
    repo.update_ref("HEAD",
                    "refs/heads/{}".format(
                        "adjusted/abc(unlocked)" if managed else "abc"),
                    symbolic=True)
    abc_ts = int(repo.format_commit("%ct"))
    repo.call_git(["checkout", "-b", "chooseme", "abc~1"])
    if managed:
        repo.adjust()
    (ds_origin.pathobj / "bar").write_text("bar content")
    with set_date(abc_ts + 1):
        ds_origin.save(message="bar")
    # Make the git-annex branch the most recently updated ref so that we test
    # that it is skipped.
    with set_date(abc_ts + 2):
        ds_origin.drop("bar", check=False)
    ds_origin.repo.checkout(DEFAULT_BRANCH, options=["--orphan"])

    ds = clone(ds_origin.path, op.join(path, "b"))
    # We landed on the branch with the most recent commit, ignoring the
    # git-annex branch.
    branch = ds.repo.get_active_branch()
    eq_(ds.repo.get_corresponding_branch(branch) or branch,
        "chooseme")
    eq_(ds_origin.repo.get_hexsha("chooseme"),
        ds.repo.get_hexsha("chooseme"))
    # In the context of this test, the clone should be on an adjusted branch if
    # the source landed there initially because we're on the same file system.
    eq_(managed, ds.repo.is_managed_branch())
Beispiel #14
0
def test_get_file_annexinfo(path=None):
    ds = Dataset(path).create(force=True)
    ds.save('ingit.txt', to_git=True)
    ds.save()
    # have some content-less component for testing
    ds.drop(ds.pathobj / 'dir1', reckless='kill')

    repo = ds.repo
    # only handles a single file at a time
    assert_raises(ValueError, repo.get_file_annexinfo, repo.pathobj / 'dir2')
    # however, it only functionally matters that there is only a single file to
    # report on not that the exact query path matches, the matching path is in
    # the report
    assert_equal(repo.pathobj / 'dir1' / 'dropped',
                 repo.get_file_annexinfo(repo.pathobj / 'dir1')['path'])

    # does not raise on a non-annex file, instead it returns no properties
    assert_equal(repo.get_file_annexinfo('ingit.txt'), {})

    # but does raise on path that doesn exist
    assert_raises(NoSuchPathError, repo.get_file_annexinfo, 'nothere')

    # check return properties for utility
    props = repo.get_file_annexinfo('inannex.txt')
    # to replace get_file_backend()
    assert_equal(props['backend'], 'MD5E')
    # to replace get_file_key()
    assert_equal(props['key'], 'MD5E-s7--3b158c5b0a18c247ebad28c09fc3e180.txt')
    # for size reporting
    assert_equal(props['bytesize'], 7)
    # all records have a pathobj
    assert_equal(props['path'], repo.pathobj / 'inannex.txt')
    # test if `eval_availability` has desired effect
    assert_not_in('has_content', props)

    # extended set of properties, after more expensive availability check
    props = repo.get_file_annexinfo('inannex.txt', eval_availability=True)
    # to replace file_has_content()
    assert_equal(props['has_content'], True)
    # to replace get_contentlocation()
    assert_equal(Path(props['objloc']).read_text(), 'inannex')

    # make sure has_content is not always True
    props = repo.get_file_annexinfo(ds.pathobj / 'dir1' / 'dropped',
                                    eval_availability=True)
    assert_equal(props['has_content'], False)
    assert_not_in('objloc', props)
Beispiel #15
0
def test_push_wanted(srcpath, dstpath):
    src = Dataset(srcpath).create()
    (src.pathobj / 'data.0').write_text('0')
    (src.pathobj / 'secure.1').write_text('1')
    (src.pathobj / 'secure.2').write_text('2')
    src.save()

    # Dropping a file to mimic a case of simply not having it locally (thus not
    # to be "pushed")
    src.drop('secure.2', check=False)

    # Annotate sensitive content, actual value "verysecure" does not matter in
    # this example
    src.repo.set_metadata(add={'distribution-restrictions': 'verysecure'},
                          files=['secure.1', 'secure.2'])

    src.create_sibling(
        dstpath,
        annex_wanted="not metadata=distribution-restrictions=*",
        name='target',
    )
    # check that wanted is obeyed, since set in sibling configuration
    res = src.push(to='target')
    assert_in_results(res,
                      action='copy',
                      path=str(src.pathobj / 'data.0'),
                      status='ok')
    for p in ('secure.1', 'secure.2'):
        assert_not_in_results(res, path=str(src.pathobj / p))
    assert_status('notneeded', src.push(to='target'))

    # check the target to really make sure
    dst = Dataset(dstpath)
    # normal file, yes
    eq_((dst.pathobj / 'data.0').read_text(), '0')
    # secure file, no
    if dst.repo.is_managed_branch():
        neq_((dst.pathobj / 'secure.1').read_text(), '1')
    else:
        assert_raises(FileNotFoundError, (dst.pathobj / 'secure.1').read_text)

    # reset wanted config, which must enable push of secure file
    src.repo.set_preferred_content('wanted', '', remote='target')
    res = src.push(to='target')
    assert_in_results(res, path=str(src.pathobj / 'secure.1'))
    eq_((dst.pathobj / 'secure.1').read_text(), '1')
Beispiel #16
0
def test_copy_file(workdir=None, webdir=None, weburl=None):
    workdir = Path(workdir)
    webdir = Path(webdir)
    src_ds = Dataset(workdir / 'src').create()
    # put a file into the dataset by URL and drop it again
    src_ds.download_url('/'.join((weburl, 'webfile1')), path='myfile1.txt')
    src_ds.download_url('/'.join((weburl, 'webfile2')),
                        path=opj('subdir', 'myfile2.txt'))
    ok_file_has_content(src_ds.pathobj / 'myfile1.txt', '123')
    # now create a fresh dataset
    dest_ds = Dataset(workdir / 'dest').create()
    if dest_ds.repo._check_version_kludges("fromkey-supports-unlocked") or \
       not dest_ds.repo.is_managed_branch():
        # unless we have a target ds on a cripples FS (where `annex fromkey`
        # doesn't work until after 8.20210428), we can even drop the file
        # content in the source repo
        src_ds.drop('myfile1.txt', reckless='kill')
        nok_(src_ds.repo.file_has_content('myfile1.txt'))
    # copy the file from the source dataset into it.
    # it must copy enough info to actually put datalad into the position
    # to obtain the file content from the original URL
    dest_ds.copy_file(src_ds.pathobj / 'myfile1.txt')
    dest_ds.get('myfile1.txt')
    ok_file_has_content(dest_ds.pathobj / 'myfile1.txt', '123')
    # purposefully pollute the employed tmp folder to check that we do not trip
    # over such a condition
    tmploc = dest_ds.pathobj / '.git' / 'tmp' / 'datalad-copy' / 'some'
    tmploc.parent.mkdir(parents=True)
    tmploc.touch()
    # copy again, but to different target file name
    # (source+dest pair now)
    dest_ds.copy_file(
        [src_ds.pathobj / 'myfile1.txt', dest_ds.pathobj / 'renamed.txt'])
    ok_file_has_content(dest_ds.pathobj / 'renamed.txt', '123')
    # copying more than one at once
    dest_ds.copy_file([
        src_ds.pathobj / 'myfile1.txt',
        src_ds.pathobj / 'subdir' / 'myfile2.txt', dest_ds.pathobj
    ])
    # copy directly from a non-dataset location
    dest_ds.copy_file(webdir / 'webfile1')

    # copy from annex dataset into gitrepo
    git_ds = Dataset(workdir / 'git').create(annex=False)
    git_ds.copy_file(src_ds.pathobj / 'subdir' / 'myfile2.txt')
Beispiel #17
0
def test_copy_file_datalad_specialremote(workdir=None,
                                         webdir=None,
                                         weburl=None):
    workdir = Path(workdir)
    src_ds = Dataset(workdir / 'src').create()
    # enable datalad special remote
    src_ds.repo.init_remote(DATALAD_SPECIAL_REMOTE, [
        'encryption=none', 'type=external',
        'externaltype={}'.format(DATALAD_SPECIAL_REMOTE), 'autoenable=true'
    ])
    # put files into the dataset by URL
    src_ds.download_url('/'.join((weburl, 'webfile1')), path='myfile1.txt')
    src_ds.download_url('/'.join((weburl, 'webfile2')), path='myfile2.txt')
    # approx test that the file is known to a remote
    # that is not the web remote
    assert_in_results(
        src_ds.repo.whereis('myfile1.txt', output='full').values(),
        here=False,
        description='[{}]'.format(DATALAD_SPECIAL_REMOTE),
    )
    # now a new dataset
    dest_ds = Dataset(workdir / 'dest').create()
    # no special remotes
    eq_(dest_ds.repo.get_special_remotes(), {})
    # must call with a dataset to get change saved, in order for drop
    # below to work properly without getting in reckless mode
    dest_ds.copy_file([src_ds.pathobj / 'myfile1.txt', dest_ds.pathobj])
    # we have an special remote in the destination dataset now
    assert_in_results(
        dest_ds.repo.get_special_remotes().values(),
        externaltype=DATALAD_SPECIAL_REMOTE,
    )
    # and it works
    dest_ds.drop('myfile1.txt')
    dest_ds.repo.get('myfile1.txt', remote='datalad')
    ok_file_has_content(dest_ds.pathobj / 'myfile1.txt', '123')

    # now replace file in dest with a different content at the same path
    # must call with a dataset to get change saved, in order for drop
    dest_ds.copy_file(
        [src_ds.pathobj / 'myfile2.txt', dest_ds.pathobj / 'myfile1.txt'])
    dest_ds.drop('myfile1.txt')
    dest_ds.repo.get('myfile1.txt', remote='datalad')
    # no gets the "same path" but yields different content
    ok_file_has_content(dest_ds.pathobj / 'myfile1.txt', 'abc')
Beispiel #18
0
def test_copy_file_into_nonannex(workdir):
    workdir = Path(workdir)
    src_ds = Dataset(workdir / 'src').create()
    (src_ds.pathobj / 'present.txt').write_text('123')
    (src_ds.pathobj / 'gone.txt').write_text('abc')
    src_ds.save()
    src_ds.drop('gone.txt', check=False)

    # destination has no annex
    dest_ds = Dataset(workdir / 'dest').create(annex=False)
    # no issue copying a file that has content
    copy_file([src_ds.pathobj / 'present.txt', dest_ds.pathobj])
    ok_file_has_content(dest_ds.pathobj / 'present.txt', '123')
    # but cannot handle a dropped file, no chance to register
    # availability info in an annex
    assert_status(
        'impossible',
        copy_file([src_ds.pathobj / 'gone.txt', dest_ds.pathobj],
                  on_failure='ignore'))
Beispiel #19
0
def test_unlock_directory(path):
    ds = Dataset(path).create(force=True)
    ds.save()
    ds.unlock(path="dir")
    dirpath = Path("dir")
    dirpath_abs = Path(ds.pathobj / "dir")

    # On adjusted branches (for the purposes of this test, crippled
    # filesystems), the files were already unlocked and the committed state is
    # the unlocked pointer file.
    is_managed_branch = ds.repo.is_managed_branch()
    if is_managed_branch:
        assert_repo_status(ds.path)
    else:
        assert_repo_status(ds.path, modified=[dirpath / "a", dirpath / "b"])
    ds.save()
    ds.drop(text_type(dirpath / "a"), check=False)
    assert_false(ds.repo.file_has_content(text_type(dirpath / "a")))

    # Unlocking without an explicit non-directory path doesn't fail if one of
    # the directory's files doesn't have content.
    res = ds.unlock(path="dir")
    assert_not_in_results(res,
                          action="unlock",
                          path=text_type(dirpath_abs / "a"))
    if is_managed_branch:
        assert_not_in_results(res,
                              action="unlock",
                              path=text_type(dirpath_abs / "b"))
    else:
        assert_in_results(res,
                          action="unlock",
                          status="ok",
                          path=text_type(dirpath_abs / "b"))
        assert_repo_status(ds.path, modified=[dirpath / "b"])

    # If we explicitly provide a path that lacks content, we get a result
    # for it.
    assert_in_results(ds.unlock(path=dirpath / "a", on_failure="ignore"),
                      action="unlock",
                      status="impossible",
                      path=text_type(dirpath_abs / "a"))
Beispiel #20
0
def test_copy_file_nourl(serv_path=None, orig_path=None, tst_path=None):
    """Tests availability transfer to normal git-annex remote"""
    # prep source dataset that will have the file content
    srv_ds = Dataset(serv_path).create()
    (srv_ds.pathobj / 'myfile.dat').write_text('I am content')
    (srv_ds.pathobj / 'noavail.dat').write_text('null')
    srv_ds.save()
    srv_ds.drop('noavail.dat', reckless='kill')
    # make an empty superdataset, with the test dataset as a subdataset
    orig_ds = Dataset(orig_path).create()
    orig_ds.clone(source=serv_path, path='serv')
    assert_repo_status(orig_ds.path)
    # now copy the test file into the superdataset
    no_avail_file = orig_ds.pathobj / 'serv' / 'noavail.dat'
    assert_in_results(
        orig_ds.copy_file(no_avail_file, on_failure='ignore'),
        status='impossible',
        message='no known location of file content',
        path=str(no_avail_file),
    )
Beispiel #21
0
def test_report_absent_keys(path):
    ds = Dataset(path).create()
    # create an annexed file
    testfile = ds.pathobj / 'dummy'
    testfile.write_text(u'nothing')
    ds.save()
    # present in a full report and in a partial report
    # based on worktree of HEAD ref
    for ai in (
            ds.repo.get_content_annexinfo(eval_availability=True),
            ds.repo.get_content_annexinfo(
                paths=['dummy'],
                eval_availability=True),
            ds.repo.get_content_annexinfo(
                ref='HEAD',
                eval_availability=True),
            ds.repo.get_content_annexinfo(
                ref='HEAD',
                paths=['dummy'],
                eval_availability=True)):
        assert_in(testfile, ai)
        assert_equal(ai[testfile]['has_content'], True)
    # drop the key, not available anywhere else
    ds.drop('dummy', check=False)
    # does not change a thing, except the key is gone
    for ai in (
            ds.repo.get_content_annexinfo(eval_availability=True),
            ds.repo.get_content_annexinfo(
                paths=['dummy'],
                eval_availability=True),
            ds.repo.get_content_annexinfo(
                ref='HEAD',
                eval_availability=True),
            ds.repo.get_content_annexinfo(
                ref='HEAD',
                paths=['dummy'],
                eval_availability=True)):
        assert_in(testfile, ai)
        assert_equal(ai[testfile]['has_content'], False)
Beispiel #22
0
def test_clean_subds_removal(path=None):
    ds = Dataset(path).create()
    subds1 = ds.create('one')
    subds2 = ds.create('two')
    eq_(sorted(ds.subdatasets(result_xfm='relpaths')), ['one', 'two'])
    assert_repo_status(ds.path)
    # now kill one
    res = ds.remove('one', reckless='availability', result_xfm=None)
    # subds1 got uninstalled, and ds got the removal of subds1 saved
    assert_result_count(res,
                        1,
                        path=subds1.path,
                        action='uninstall',
                        status='ok')
    assert_result_count(res, 1, path=subds1.path, action='remove', status='ok')
    assert_result_count(res, 1, path=ds.path, action='save', status='ok')
    ok_(not subds1.is_installed())
    assert_repo_status(ds.path)
    # two must remain
    eq_(ds.subdatasets(result_xfm='relpaths'), ['two'])
    # one is gone
    nok_(subds1.pathobj.exists())
    # and now again, but this time remove something that is not installed
    ds.create('three')
    eq_(sorted(ds.subdatasets(result_xfm='relpaths')), ['three', 'two'])
    ds.drop('two', what='all', reckless='availability')
    assert_repo_status(ds.path)
    eq_(sorted(ds.subdatasets(result_xfm='relpaths')), ['three', 'two'])
    nok_(subds2.is_installed())
    # oderly empty mountpoint is maintained
    ok_(subds2.pathobj.exists())
    res = ds.remove('two', reckless='availability')
    assert_in_results(res, path=str(ds.pathobj / 'two'), action='remove')
    assert_repo_status(ds.path)
    # subds2 was already uninstalled, now ds got the removal of subds2 saved
    nok_(subds2.pathobj.exists())
    eq_(ds.subdatasets(result_xfm='relpaths'), ['three'])
Beispiel #23
0
def test_rerun(path, nodspath):
    ds = Dataset(path).create()
    sub = ds.create('sub')
    probe_path = opj(sub.path, 'sequence')
    # run inside the dataset
    with chpwd(path), \
            swallow_outputs():
        ds.run('echo x$(cat sub/sequence) > sub/sequence')
    # command ran once, all clean
    ok_clean_git(ds.path)
    eq_('x\n', open(probe_path).read())
    # now, for a rerun we can be anywhere, PWD and all are recorded
    # moreover, rerun must figure out which bits to unlock, even in
    # subdatasets
    with chpwd(nodspath), \
            swallow_outputs():
        ds.rerun()
    ok_clean_git(ds.path)
    # ran twice now
    eq_('xx\n', open(probe_path).read())

    # Rerunning from a subdataset skips the command.
    _, sub_info = get_run_info(ds, sub.repo.repo.head.commit.message)
    eq_(ds.id, sub_info["dsid"])
    assert_result_count(sub.rerun(return_type="list", on_failure="ignore"),
                        1,
                        status="impossible",
                        action="run",
                        rerun_action="skip")
    eq_('xx\n', open(probe_path).read())

    # Rerun fails with a dirty repo.
    dirt = opj(path, "dirt")
    with open(dirt, "w") as fh:
        fh.write("")
    assert_status('impossible', ds.rerun(on_failure="ignore"))
    remove(dirt)
    ok_clean_git(ds.path)

    # Make a non-run commit.
    with open(opj(path, "nonrun-file"), "w") as f:
        f.write("foo")
    ds.add("nonrun-file")
    # Now rerun the buried command.
    ds.rerun(revision="HEAD~", message="rerun buried")
    eq_('xxx\n', open(probe_path).read())
    # Also check that the messasge override worked.
    eq_(
        ds.repo.format_commit("%B").splitlines()[0],
        "[DATALAD RUNCMD] rerun buried")
    # Or a range of commits, skipping non-run commits.
    ds.rerun(since="HEAD~3")
    eq_('xxxxx\n', open(probe_path).read())
    # Or --since= to run all reachable commits.
    ds.rerun(since="")
    eq_('xxxxxxxxxx\n', open(probe_path).read())

    # We can get back a report of what would happen rather than actually
    # rerunning anything.
    report = ds.rerun(since="", report=True, return_type="list")
    # Nothing changed.
    eq_('xxxxxxxxxx\n', open(probe_path).read())
    assert_result_count(report, 1, rerun_action="skip")
    report[-1]["commit"] == ds.repo.get_hexsha()

    # If a file is dropped, we remove it instead of unlocking it.
    ds.drop(probe_path, check=False)
    with swallow_outputs():
        ds.rerun()

    eq_('x\n', open(probe_path).read())
    # If the history to rerun has a merge commit, we abort.
    ds.repo.checkout("HEAD~3", options=["-b", "topic"])
    with open(opj(path, "topic-file"), "w") as f:
        f.write("topic")
    ds.add("topic-file")
    ds.repo.checkout("master")
    ds.repo.merge("topic")
    ok_clean_git(ds.path)
    assert_raises(IncompleteResultsError, ds.rerun)
Beispiel #24
0
def test_dry_run(path=None):
    ds = Dataset(path).create(force=True)

    # The dataset is reported as dirty, and the custom result render relays
    # that to the default renderer.
    with swallow_outputs() as cmo:
        with assert_raises(IncompleteResultsError):
            ds.run("blah ", dry_run="basic")
        assert_in("run(impossible)", cmo.out)
        assert_not_in("blah", cmo.out)

    ds.save()

    # unknown dry-run mode
    assert_raises(ValueError, ds.run, 'blah', dry_run='absurd')

    with swallow_outputs() as cmo:
        ds.run("blah ", dry_run="basic")
        assert_in("Dry run", cmo.out)
        assert_in("location", cmo.out)
        assert_in("blah", cmo.out)
        assert_not_in("expanded inputs", cmo.out)
        assert_not_in("expanded outputs", cmo.out)

    with swallow_outputs() as cmo:
        ds.run("blah {inputs} {outputs}",
               dry_run="basic",
               inputs=["fo*"],
               outputs=["b*r"])
        assert_in('blah "foo" "bar"' if on_windows else "blah foo bar",
                  cmo.out)
        assert_in("expanded inputs", cmo.out)
        assert_in("['foo']", cmo.out)
        assert_in("expanded outputs", cmo.out)
        assert_in("['bar']", cmo.out)

    # Just the command.
    with swallow_outputs() as cmo:
        ds.run("blah ", dry_run="command")
        assert_not_in("Dry run", cmo.out)
        assert_in("blah", cmo.out)
        assert_not_in("inputs", cmo.out)

    # The output file wasn't unlocked.
    assert_repo_status(ds.path)

    # Subdaset handling

    subds = ds.create("sub")
    (subds.pathobj / "baz").write_text("z")
    ds.save(recursive=True)

    # If a subdataset is installed, it works as usual.
    with swallow_outputs() as cmo:
        ds.run("blah {inputs}", dry_run="basic", inputs=["sub/b*"])
        assert_in('blah "sub\\baz"' if on_windows else 'blah sub/baz', cmo.out)

    # However, a dry run will not do the install/reglob procedure.
    ds.drop("sub", what='all', reckless='kill', recursive=True)
    with swallow_outputs() as cmo:
        ds.run("blah {inputs}", dry_run="basic", inputs=["sub/b*"])
        assert_in("sub/b*", cmo.out)
        assert_not_in("baz", cmo.out)
Beispiel #25
0
def test_ls_json(topdir, topurl):
    annex = AnnexRepo(topdir, create=True)
    ds = Dataset(topdir)
    # create some file and commit it
    with open(opj(ds.path, 'subdsfile.txt'), 'w') as f:
        f.write('123')
    ds.add(path='subdsfile.txt')
    ds.save("Hello!", version_tag=1)

    # add a subdataset
    ds.install('subds', source=topdir)

    subdirds = ds.create(_path_('dir/subds2'), force=True)
    subdirds.add('file')

    git = GitRepo(opj(topdir, 'dir', 'subgit'), create=True)                    # create git repo
    git.add(opj(topdir, 'dir', 'subgit', 'fgit.txt'))                           # commit to git to init git repo
    git.commit()
    annex.add(opj(topdir, 'dir', 'subgit'))                                     # add the non-dataset git repo to annex
    annex.add(opj(topdir, 'dir'))                                               # add to annex (links)
    annex.drop(opj(topdir, 'dir', 'subdir', 'file2.txt'), options=['--force'])  # broken-link
    annex.commit()

    git.add('fgit.txt')              # commit to git to init git repo
    git.commit()
    # annex.add doesn't add submodule, so using ds.add
    ds.add(opj('dir', 'subgit'))                        # add the non-dataset git repo to annex
    ds.add('dir')                                  # add to annex (links)
    ds.drop(opj('dir', 'subdir', 'file2.txt'), check=False)  # broken-link

    # register "external" submodule  by installing and uninstalling it
    ext_url = topurl + '/dir/subgit/.git'
    # need to make it installable via http
    Runner()('git update-server-info', cwd=opj(topdir, 'dir', 'subgit'))
    ds.install(opj('dir', 'subgit_ext'), source=ext_url)
    ds.uninstall(opj('dir', 'subgit_ext'))
    meta_dir = opj('.git', 'datalad', 'metadata')

    def get_metahash(*path):
        if not path:
            path = ['/']
        return hashlib.md5(opj(*path).encode('utf-8')).hexdigest()

    def get_metapath(dspath, *path):
        return _path_(dspath, meta_dir, get_metahash(*path))

    def get_meta(dspath, *path):
        with open(get_metapath(dspath, *path)) as f:
            return js.load(f)

    # Let's see that there is no crash if one of the files is available only
    # in relaxed URL mode, so no size could be picked up
    ds.repo.add_url_to_file(
        'fromweb', topurl + '/noteventhere', options=['--relaxed'])

    for all_ in [True, False]:  # recurse directories
        for recursive in [True, False]:
            for state in ['file', 'delete']:
                # subdataset should have its json created and deleted when
                # all=True else not
                subds_metapath = get_metapath(opj(topdir, 'subds'))
                exists_prior = exists(subds_metapath)

                #with swallow_logs(), swallow_outputs():
                dsj = _ls_json(
                    topdir,
                    json=state,
                    all_=all_,
                    recursive=recursive
                )
                ok_startswith(dsj['tags'], '1-')

                exists_post = exists(subds_metapath)
                # print("%s %s -> %s" % (state, exists_prior, exists_post))
                assert_equal(exists_post, (state == 'file' and recursive))

                # root should have its json file created and deleted in all cases
                ds_metapath = get_metapath(topdir)
                assert_equal(exists(ds_metapath), state == 'file')

                # children should have their metadata json's created and deleted only when recursive=True
                child_metapath = get_metapath(topdir, 'dir', 'subdir')
                assert_equal(exists(child_metapath), (state == 'file' and all_))

                # ignored directories should not have json files created in any case
                for subdir in [('.hidden',), ('dir', 'subgit')]:
                    assert_false(exists(get_metapath(topdir, *subdir)))

                # check if its updated in its nodes sublist too. used by web-ui json. regression test
                assert_equal(dsj['nodes'][0]['size']['total'], dsj['size']['total'])

                # check size of subdataset
                subds = [item for item in dsj['nodes'] if item['name'] == ('subdsfile.txt' or 'subds')][0]
                assert_equal(subds['size']['total'], '3 Bytes')

                # dir/subds2 must not be listed among nodes of the top dataset:
                topds_nodes = {x['name']: x for x in dsj['nodes']}

                assert_in('subds', topds_nodes)
                # XXX
                # # condition here is a bit a guesswork by yoh later on
                # # TODO: here and below clear destiny/interaction of all_ and recursive
                # assert_equal(dsj['size']['total'],
                #              '15 Bytes' if (recursive and all_) else
                #              ('9 Bytes' if (recursive or all_) else '3 Bytes')
                # )

                # https://github.com/datalad/datalad/issues/1674
                if state == 'file' and all_:
                    dirj = get_meta(topdir, 'dir')
                    dir_nodes = {x['name']: x for x in dirj['nodes']}
                    # it should be present in the subdir meta
                    assert_in('subds2', dir_nodes)
                    assert_not_in('url_external', dir_nodes['subds2'])
                    assert_in('subgit_ext', dir_nodes)
                    assert_equal(dir_nodes['subgit_ext']['url'], ext_url)
                # and not in topds
                assert_not_in('subds2', topds_nodes)

                # run non-recursive dataset traversal after subdataset metadata already created
                # to verify sub-dataset metadata being picked up from its metadata file in such cases
                if state == 'file' and recursive and not all_:
                    dsj = _ls_json(topdir, json='file', all_=False)
                    subds = [
                        item for item in dsj['nodes']
                        if item['name'] == ('subdsfile.txt' or 'subds')
                    ][0]
                    assert_equal(subds['size']['total'], '3 Bytes')

                assert_equal(
                    topds_nodes['fromweb']['size']['total'], UNKNOWN_SIZE
                )
Beispiel #26
0
def test_run_assume_ready(path):
    ds = Dataset(path).create()
    repo = ds.repo
    adjusted = repo.is_managed_branch()

    # --assume-ready=inputs

    (repo.pathobj / "f1").write_text("f1")
    ds.save()

    def cat_cmd(fname):
        return [
            sys.executable, "-c",
            "import sys; print(open(sys.argv[-1]).read())", fname
        ]

    assert_in_results(ds.run(cat_cmd("f1"), inputs=["f1"]),
                      action="get",
                      type="file")
    # Same thing, but without the get() call.
    assert_not_in_results(ds.run(cat_cmd("f1"),
                                 inputs=["f1"],
                                 assume_ready="inputs"),
                          action="get",
                          type="file")

    ds.drop("f1", check=False)
    if not adjusted:
        # If the input is not actually ready, the command will fail.
        with assert_raises(CommandError):
            ds.run(cat_cmd("f1"), inputs=["f1"], assume_ready="inputs")

    # --assume-ready=outputs

    def unlink_and_write_cmd(fname):
        # This command doesn't care whether the output file is unlocked because
        # it removes it ahead of time anyway.
        return [
            sys.executable, "-c",
            "import sys; import os; import os.path as op; "
            "f = sys.argv[-1]; op.lexists(f) and os.unlink(f); "
            "open(f, mode='w').write(str(sys.argv))", fname
        ]

    (repo.pathobj / "f2").write_text("f2")
    ds.save()

    res = ds.run(unlink_and_write_cmd("f2"), outputs=["f2"])
    if not adjusted:
        assert_in_results(res, action="unlock", type="file")
    # Same thing, but without the unlock() call.
    res = ds.run(unlink_and_write_cmd("f2"),
                 outputs=["f2"],
                 assume_ready="outputs")
    assert_not_in_results(res, action="unlock", type="file")

    # --assume-ready=both

    res = ds.run(unlink_and_write_cmd("f2"), outputs=["f2"], inputs=["f2"])
    assert_in_results(res, action="get", type="file")
    if not adjusted:
        assert_in_results(res, action="unlock", type="file")

    res = ds.run(unlink_and_write_cmd("f2"),
                 outputs=["f2"],
                 inputs=["f2"],
                 assume_ready="both")
    assert_not_in_results(res, action="get", type="file")
    assert_not_in_results(res, action="unlock", type="file")
Beispiel #27
0
def test_run_inputs_outputs(path):
    ds = Dataset(path)

    assert_false(ds.repo.file_has_content("test-annex.dat"))

    # If we specify test-annex.dat as an input, it will be retrieved before the
    # run.
    ds.run("cat test-annex.dat test-annex.dat >doubled.dat",
           inputs=["test-annex.dat"])

    ok_clean_git(ds.path)
    ok_(ds.repo.file_has_content("test-annex.dat"))
    ok_(ds.repo.file_has_content("doubled.dat"))

    # Rerunning the commit will also get the input file.
    ds.repo.drop("test-annex.dat", options=["--force"])
    assert_false(ds.repo.file_has_content("test-annex.dat"))
    ds.rerun()
    ok_(ds.repo.file_has_content("test-annex.dat"))

    with swallow_logs(new_level=logging.WARN) as cml:
        ds.run("touch dummy", inputs=["*.not-an-extension"])
        assert_in("No matching files found for '*.not-an-extension'", cml.out)

    # Test different combinations of globs and explicit files.
    inputs = ["a.dat", "b.dat", "c.txt", "d.txt"]
    create_tree(ds.path, {i: i for i in inputs})

    ds.add(".")
    ds.repo.copy_to(inputs, remote="origin")
    ds.repo.drop(inputs, options=["--force"])

    test_cases = [(["*.dat"], ["a.dat", "b.dat"]),
                  (["*.dat", "c.txt"], ["a.dat", "b.dat", "c.txt"]),
                  (["*"], inputs)]

    for idx, (inputs_arg, expected_present) in enumerate(test_cases):
        assert_false(any(ds.repo.file_has_content(i) for i in inputs))

        ds.run("touch dummy{}".format(idx), inputs=inputs_arg)
        ok_(all(ds.repo.file_has_content(f) for f in expected_present))
        # Globs are stored unexpanded by default.
        assert_in(inputs_arg[0], ds.repo.repo.head.commit.message)
        ds.repo.drop(inputs, options=["--force"])

    # --input can be passed a subdirectory.
    create_tree(ds.path, {"subdir": {"a": "subdir a", "b": "subdir b"}})
    ds.add("subdir")
    ds.repo.copy_to(["subdir/a", "subdir/b"], remote="origin")
    ds.repo.drop("subdir", options=["--force"])
    ds.run("touch subdir-dummy", inputs=[opj(ds.path, "subdir")])
    ok_(all(ds.repo.file_has_content(opj("subdir", f)) for f in ["a", "b"]))

    # Inputs are specified relative to a dataset's subdirectory.
    ds.repo.drop(opj("subdir", "a"), options=["--force"])
    with chpwd(opj(path, "subdir")):
        run("touch subdir-dummy1", inputs=["a"])
    ok_(ds.repo.file_has_content(opj("subdir", "a")))

    # --input=. runs "datalad get ."
    ds.run("touch dot-dummy", inputs=["."])
    eq_(ds.repo.get_annexed_files(),
        ds.repo.get_annexed_files(with_content_only=True))
    # On rerun, we get all files, even those that weren't in the tree at the
    # time of the run.
    create_tree(ds.path, {"after-dot-run": "after-dot-run content"})
    ds.add(".")
    ds.repo.copy_to(["after-dot-run"], remote="origin")
    ds.repo.drop(["after-dot-run"], options=["--force"])
    ds.rerun("HEAD^")
    ds.repo.file_has_content("after-dot-run")

    # --output will unlock files that are present.
    ds.repo.get("a.dat")
    ds.run("echo ' appended' >>a.dat", outputs=["a.dat"])
    with open(opj(path, "a.dat")) as fh:
        eq_(fh.read(), "a.dat appended\n")

    # --output will remove files that are not present.
    ds.repo.drop("a.dat", options=["--force"])
    ds.run("echo ' appended' >>a.dat", outputs=["a.dat"])
    with open(opj(path, "a.dat")) as fh:
        eq_(fh.read(), " appended\n")

    # --input can be combined with --output.
    ds.repo.repo.git.reset("--hard", "HEAD~2")
    ds.run("echo ' appended' >>a.dat", inputs=["a.dat"], outputs=["a.dat"])
    with open(opj(path, "a.dat")) as fh:
        eq_(fh.read(), "a.dat appended\n")

    with swallow_logs(new_level=logging.WARN) as cml:
        ds.run("echo blah", outputs=["*.not-an-extension"])
        assert_in("No matching files found for '*.not-an-extension'", cml.out)

    ds.create('sub')
    ds.run("echo sub_orig >sub/subfile")
    ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"])
    ds.drop("sub/subfile", check=False)
    ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"])

    # --input/--output globs can be stored in expanded form.
    ds.run("touch expand-dummy",
           inputs=["a.*"],
           outputs=["b.*"],
           expand="both")
    assert_in("a.dat", ds.repo.repo.head.commit.message)
    assert_in("b.dat", ds.repo.repo.head.commit.message)

    res = ds.rerun(report=True, return_type='item-or-list')
    eq_(res["run_info"]['inputs'], ["a.dat"])
    eq_(res["run_info"]['outputs'], ["b.dat"])
Beispiel #28
0
def test_rerun(path, nodspath):
    ds = Dataset(path).create()
    sub = ds.create('sub')
    probe_path = opj(sub.path, 'sequence')
    # run inside the dataset
    with chpwd(path), \
            swallow_outputs():
        ds.run('echo x$(cat sub/sequence) > sub/sequence')
    # command ran once, all clean
    ok_clean_git(ds.path)
    eq_('x\n', open(probe_path).read())
    # now, for a rerun we can be anywhere, PWD and all are recorded
    # moreover, rerun must figure out which bits to unlock, even in
    # subdatasets
    with chpwd(nodspath), \
            swallow_outputs():
        ds.rerun()
    ok_clean_git(ds.path)
    # ran twice now
    eq_('xx\n', open(probe_path).read())

    # Rerunning from a subdataset skips the command.
    _, sub_info = get_run_info(ds, sub.repo.repo.head.commit.message)
    eq_(ds.id, sub_info["dsid"])
    assert_result_count(
        sub.rerun(return_type="list", on_failure="ignore"),
        1, status="impossible", action="run", rerun_action="skip")
    eq_('xx\n', open(probe_path).read())

    # Rerun fails with a dirty repo.
    dirt = opj(path, "dirt")
    with open(dirt, "w") as fh:
        fh.write("")
    assert_status('impossible', ds.rerun(on_failure="ignore"))
    remove(dirt)
    ok_clean_git(ds.path)

    # Make a non-run commit.
    with open(opj(path, "nonrun-file"), "w") as f:
        f.write("foo")
    ds.add("nonrun-file")
    # Now rerun the buried command.
    ds.rerun(revision="HEAD~", message="rerun buried")
    eq_('xxx\n', open(probe_path).read())
    # Also check that the messasge override worked.
    eq_(ds.repo.format_commit("%B").splitlines()[0],
        "[DATALAD RUNCMD] rerun buried")
    # Or a range of commits, skipping non-run commits.
    ds.rerun(since="HEAD~3")
    eq_('xxxxx\n', open(probe_path).read())
    # Or --since= to run all reachable commits.
    ds.rerun(since="")
    eq_('xxxxxxxxxx\n', open(probe_path).read())

    # We can get back a report of what would happen rather than actually
    # rerunning anything.
    report = ds.rerun(since="", report=True, return_type="list")
    # Nothing changed.
    eq_('xxxxxxxxxx\n', open(probe_path).read())
    assert_result_count(report, 1, rerun_action="skip")
    report[-1]["commit"] == ds.repo.get_hexsha()

    # If a file is dropped, we remove it instead of unlocking it.
    ds.drop(probe_path, check=False)
    ds.rerun()
    eq_('x\n', open(probe_path).read())
    # If the history to rerun has a merge commit, we abort.
    ds.repo.checkout("HEAD~3", options=["-b", "topic"])
    with open(opj(path, "topic-file"), "w") as f:
        f.write("topic")
    ds.add("topic-file")
    ds.repo.checkout("master")
    ds.repo.merge("topic")
    ok_clean_git(ds.path)
    assert_raises(IncompleteResultsError, ds.rerun)
Beispiel #29
0
def test_rerun(path, nodspath):
    ds = Dataset(path).create()
    sub = ds.create('sub')
    probe_path = op.join(sub.path, 'sequence')
    # run inside the dataset
    with chpwd(path), \
            swallow_outputs():
        ds.run('echo x$(cat sub/sequence) > sub/sequence')
    # command ran once, all clean
    assert_repo_status(ds.path)
    eq_('x\n', open(probe_path).read())
    # now, for a rerun we can be anywhere, PWD and all are recorded
    # moreover, rerun must figure out which bits to unlock, even in
    # subdatasets
    with chpwd(nodspath), \
            swallow_outputs():
        ds.rerun()
    assert_repo_status(ds.path)
    # ran twice now
    eq_('xx\n', open(probe_path).read())

    # Rerunning from a subdataset skips the command.
    _, sub_info = get_run_info(ds, sub.repo.format_commit("%B"))
    eq_(ds.id, sub_info["dsid"])
    assert_result_count(sub.rerun(return_type="list", on_failure="ignore"),
                        1,
                        status="impossible",
                        action="run",
                        rerun_action="skip")
    eq_('xx\n', open(probe_path).read())

    # Rerun fails with a dirty repo.
    dirt = op.join(path, "dirt")
    with open(dirt, "w") as fh:
        fh.write("")
    assert_status('impossible', ds.rerun(on_failure="ignore"))
    remove(dirt)
    assert_repo_status(ds.path)

    # Make a non-run commit.
    with open(op.join(path, "nonrun-file"), "w") as f:
        f.write("foo")
    ds.save("nonrun-file")
    # Now rerun the buried command.
    ds.rerun(revision="HEAD~", message="rerun buried")
    eq_('xxx\n', open(probe_path).read())
    # Also check that the messasge override worked.
    eq_(
        ds.repo.format_commit("%B").splitlines()[0],
        "[DATALAD RUNCMD] rerun buried")
    # Or a range of commits, skipping non-run commits.
    ds.rerun(since="HEAD~3")
    eq_('xxxxx\n', open(probe_path).read())
    # Or --since= to run all reachable commits.
    ds.rerun(since="")
    eq_('xxxxxxxxxx\n', open(probe_path).read())

    # We can get back a report of what would happen rather than actually
    # rerunning anything.
    report = ds.rerun(since="", report=True, return_type="list")
    # The "diff" section of the report doesn't include the unchanged files that
    # would come in "-f json diff" output.
    for entry in report:
        if entry["rerun_action"] == "run":
            # None of the run commits touch .datalad/config or any other config
            # file.
            assert_false(
                any(r["path"].endswith("config") for r in entry["diff"]))

    # Nothing changed.
    eq_('xxxxxxxxxx\n', open(probe_path).read())
    assert_result_count(report, 1, rerun_action="skip-or-pick")
    report[-1]["commit"] == ds.repo.get_hexsha()

    # If a file is dropped, we remove it instead of unlocking it.
    ds.drop(probe_path, check=False)
    with swallow_outputs():
        ds.rerun()

    eq_('x\n', open(probe_path).read())
Beispiel #30
0
def test_ls_json(topdir, topurl):
    annex = AnnexRepo(topdir, create=True)
    ds = Dataset(topdir)
    # create some file and commit it
    with open(opj(ds.path, 'subdsfile.txt'), 'w') as f:
        f.write('123')
    ds.add(path='subdsfile.txt')
    ds.save("Hello!", version_tag=1)

    # add a subdataset
    ds.install('subds', source=topdir)

    subdirds = ds.create(_path_('dir/subds2'), force=True)
    subdirds.add('file')

    git = GitRepo(opj(topdir, 'dir', 'subgit'), create=True)  # create git repo
    git.add(opj(topdir, 'dir', 'subgit',
                'fgit.txt'))  # commit to git to init git repo
    git.commit()
    annex.add(opj(topdir, 'dir',
                  'subgit'))  # add the non-dataset git repo to annex
    annex.add(opj(topdir, 'dir'))  # add to annex (links)
    annex.drop(opj(topdir, 'dir', 'subdir', 'file2.txt'),
               options=['--force'])  # broken-link
    annex.commit()

    git.add('fgit.txt')  # commit to git to init git repo
    git.commit()
    # annex.add doesn't add submodule, so using ds.add
    ds.add(opj('dir', 'subgit'))  # add the non-dataset git repo to annex
    ds.add('dir')  # add to annex (links)
    ds.drop(opj('dir', 'subdir', 'file2.txt'), check=False)  # broken-link

    # register "external" submodule  by installing and uninstalling it
    ext_url = topurl + '/dir/subgit/.git'
    # need to make it installable via http
    Runner()('git update-server-info', cwd=opj(topdir, 'dir', 'subgit'))
    ds.install(opj('dir', 'subgit_ext'), source=ext_url)
    ds.uninstall(opj('dir', 'subgit_ext'))
    meta_dir = opj('.git', 'datalad', 'metadata')

    def get_metahash(*path):
        if not path:
            path = ['/']
        return hashlib.md5(opj(*path).encode('utf-8')).hexdigest()

    def get_metapath(dspath, *path):
        return _path_(dspath, meta_dir, get_metahash(*path))

    def get_meta(dspath, *path):
        with open(get_metapath(dspath, *path)) as f:
            return js.load(f)

    # Let's see that there is no crash if one of the files is available only
    # in relaxed URL mode, so no size could be picked up
    ds.repo.add_url_to_file('fromweb',
                            topurl + '/noteventhere',
                            options=['--relaxed'])

    for all_ in [True, False]:  # recurse directories
        for recursive in [True, False]:
            for state in ['file', 'delete']:
                # subdataset should have its json created and deleted when
                # all=True else not
                subds_metapath = get_metapath(opj(topdir, 'subds'))
                exists_prior = exists(subds_metapath)

                #with swallow_logs(), swallow_outputs():
                dsj = _ls_json(topdir,
                               json=state,
                               all_=all_,
                               recursive=recursive)
                ok_startswith(dsj['tags'], '1-')

                exists_post = exists(subds_metapath)
                # print("%s %s -> %s" % (state, exists_prior, exists_post))
                assert_equal(exists_post, (state == 'file' and recursive))

                # root should have its json file created and deleted in all cases
                ds_metapath = get_metapath(topdir)
                assert_equal(exists(ds_metapath), state == 'file')

                # children should have their metadata json's created and deleted only when recursive=True
                child_metapath = get_metapath(topdir, 'dir', 'subdir')
                assert_equal(exists(child_metapath),
                             (state == 'file' and all_))

                # ignored directories should not have json files created in any case
                for subdir in [('.hidden', ), ('dir', 'subgit')]:
                    assert_false(exists(get_metapath(topdir, *subdir)))

                # check if its updated in its nodes sublist too. used by web-ui json. regression test
                assert_equal(dsj['nodes'][0]['size']['total'],
                             dsj['size']['total'])

                # check size of subdataset
                subds = [
                    item for item in dsj['nodes']
                    if item['name'] == ('subdsfile.txt' or 'subds')
                ][0]
                assert_equal(subds['size']['total'], '3 Bytes')

                # dir/subds2 must not be listed among nodes of the top dataset:
                topds_nodes = {x['name']: x for x in dsj['nodes']}

                assert_in('subds', topds_nodes)
                # XXX
                # # condition here is a bit a guesswork by yoh later on
                # # TODO: here and below clear destiny/interaction of all_ and recursive
                # assert_equal(dsj['size']['total'],
                #              '15 Bytes' if (recursive and all_) else
                #              ('9 Bytes' if (recursive or all_) else '3 Bytes')
                # )

                # https://github.com/datalad/datalad/issues/1674
                if state == 'file' and all_:
                    dirj = get_meta(topdir, 'dir')
                    dir_nodes = {x['name']: x for x in dirj['nodes']}
                    # it should be present in the subdir meta
                    assert_in('subds2', dir_nodes)
                    assert_not_in('url_external', dir_nodes['subds2'])
                    assert_in('subgit_ext', dir_nodes)
                    assert_equal(dir_nodes['subgit_ext']['url'], ext_url)
                # and not in topds
                assert_not_in('subds2', topds_nodes)

                # run non-recursive dataset traversal after subdataset metadata already created
                # to verify sub-dataset metadata being picked up from its metadata file in such cases
                if state == 'file' and recursive and not all_:
                    dsj = _ls_json(topdir, json='file', all_=False)
                    subds = [
                        item for item in dsj['nodes']
                        if item['name'] == ('subdsfile.txt' or 'subds')
                    ][0]
                    assert_equal(subds['size']['total'], '3 Bytes')

                assert_equal(topds_nodes['fromweb']['size']['total'],
                             UNKNOWN_SIZE)