Beispiel #1
0
def test_bundle_invariance(path):
    remote_url = 'ssh://localhost'
    manager = SSHManager()
    testfile = Path(path) / 'dummy'
    for flag in (True, False):
        assert_false(testfile.exists())
        ssh = manager.get_connection(remote_url, use_remote_annex_bundle=flag)
        ssh('cd .>{}'.format(text_type(testfile)))
        ok_(testfile.exists())
        testfile.unlink()
Beispiel #2
0
def test_ssh_copy(sourcedir, sourcefile1, sourcefile2):

    remote_url = 'ssh://localhost:22'
    manager = SSHManager()
    ssh = manager.get_connection(remote_url)

    # write to obscurely named file in sourcedir
    obscure_file = opj(sourcedir, get_most_obscure_supported_name())
    with open(obscure_file, 'w') as f:
        f.write("three")

    # copy tempfile list to remote_url:sourcedir
    sourcefiles = [sourcefile1, sourcefile2, obscure_file]
    ssh.put(sourcefiles, opj(remote_url, sourcedir))
    # docs promise that connection is auto-opened
    ok_(ssh.is_open())

    # recursive copy tempdir to remote_url:targetdir
    targetdir = sourcedir + '.c opy'
    ssh.put(sourcedir, opj(remote_url, targetdir),
            recursive=True, preserve_attrs=True)

    # check if sourcedir copied to remote_url:targetdir
    ok_(isdir(targetdir))
    # check if scp preserved source directory attributes
    # if source_mtime=1.12s, scp -p sets target_mtime = 1.0s, test that
    eq_(getmtime(targetdir), int(getmtime(sourcedir)) + 0.0)

    # check if targetfiles(and its content) exist in remote_url:targetdir,
    # this implies file(s) and recursive directory copying pass
    for targetfile, content in zip(sourcefiles, ["one", "two", "three"]):
        targetpath = opj(targetdir, targetfile)
        ok_(exists(targetpath))
        with open(targetpath, 'r') as fp:
            eq_(content, fp.read())

    # and now a quick smoke test for get
    togetfile = Path(targetdir) / '2|g>e"t.t&x;t'
    togetfile.write_text(text_type('something'))
    ssh.get(opj(remote_url, text_type(togetfile)), sourcedir)
    ok_((Path(sourcedir) / '2|g>e"t.t&x;t').exists())

    ssh.close()
Beispiel #3
0
def test_sibling_path_is_posix(basedir, otherpath):
    ds_source = Dataset(opj(basedir, "source")).create()
    # add remote with system native path
    ds_source.siblings(
        action="add",
        name="donotexist",
        url=otherpath,
        result_renderer=None)
    res = ds_source.siblings(
        action="query",
        name="donotexist",
        result_renderer=None,
        return_type='item-or-list')
    # path URL should come out POSIX as if `git clone` had configured it for origin
    # https://github.com/datalad/datalad/issues/3972
    eq_(res['url'], Path(otherpath).as_posix())
Beispiel #4
0
def test_update_adjusted_incompatible_with_ff_only(path=None):
    path = Path(path)
    ds_src = Dataset(path / "source").create()

    ds_clone = install(source=ds_src.path,
                       path=path / "clone",
                       recursive=True,
                       result_xfm="datasets")
    maybe_adjust_repo(ds_clone.repo)

    assert_in_results(ds_clone.update(merge="ff-only", on_failure="ignore"),
                      action="update",
                      status="impossible")
    assert_in_results(ds_clone.update(on_failure="ignore"),
                      action="update",
                      status="ok")
def test_push_matching(path):
    path = Path(path)
    ds = Dataset(path / "ds").create(force=True)
    ds.config.set('push.default', 'matching', where='local')
    ds.save()
    remote_ds = mk_push_target(ds,
                               'local',
                               str(path / 'dssibling'),
                               annex=True,
                               bare=False)
    # that fact that the next one even runs makes sure that we are in a better
    # place than https://github.com/datalad/datalad/issues/4888
    ds.push(to='local')
    # and we pushed the commit in the current branch
    eq_(remote_ds.get_hexsha(DEFAULT_BRANCH),
        ds.repo.get_hexsha(DEFAULT_BRANCH))
def test_push_git_annex_branch_when_no_data(path):
    path = Path(path)
    ds = Dataset(path / "a").create()
    target = mk_push_target(ds,
                            "target",
                            str(path / "target"),
                            annex=False,
                            bare=True)
    (ds.pathobj / "f0").write_text("0")
    ds.save()
    ds.push(to="target", data="nothing")
    assert_in(
        "git-annex", {
            d["refname:strip=2"]
            for d in target.for_each_ref_(fields="refname:strip=2")
        })
Beispiel #7
0
def test_clone_dataladri(src, topurl, path):
    # make plain git repo
    ds_path = Path(src) / 'ds'
    gr = GitRepo(ds_path, create=True)
    gr.add('test.txt')
    gr.commit('demo')
    Runner(cwd=gr.path)(['git', 'update-server-info'])
    # now install it somewhere else
    with patch('datalad.consts.DATASETS_TOPURL', topurl):
        ds = clone('///ds',
                   path,
                   result_xfm='datasets',
                   return_type='item-or-list')
    eq_(ds.path, path)
    assert_repo_status(path, annex=False)
    ok_file_has_content(ds.pathobj / 'test.txt', 'some')
Beispiel #8
0
def test_merge_no_merge_target(path):
    path = Path(path)
    ds_src = Dataset(path / "source").create()
    if ds_src.repo.is_managed_branch():
        # `git annex sync REMOTE` rather than `git merge TARGET` is used on an
        # adjusted branch, so we don't give an error if TARGET can't be
        # determined.
        raise SkipTest("Test depends on non-adjusted branch")
    ds_clone = install(source=ds_src.path,
                       path=path / "clone",
                       recursive=True,
                       result_xfm="datasets")
    assert_repo_status(ds_src.path)
    ds_clone.repo.checkout("master", options=["-bnew"])
    res = ds_clone.update(merge=True, on_failure="ignore")
    assert_in_results(res, status="impossible", action="update")
Beispiel #9
0
def test_aggregate_aggregation(path):
    path = Path(path)
    ds = Dataset(path).create()
    sourceds = ds.create(path / 'source')
    origds = ds.create(path / 'source' / 'orig')
    # put a single (empty) file in origds to have some metadata-relevant
    # content
    payload = origds.pathobj / 'CONTENT'
    payload.write_text(u'')
    ds.save(recursive=True)
    assert_repo_status(ds.path)
    # aggregate orids metadata into sourceds, note the trailing slash
    sourceds.meta_aggregate('orig' + op.sep)
    # orig has no aggregates
    assert_status(
        'impossible',
        origds.meta_dump(reporton='aggregates',
                         recursive=True,
                         on_failure='ignore'))
    # but sourceds has exactly one record -- that of origds
    res = sourceds.meta_dump(reporton='aggregates',
                             recursive=True,
                             on_failure='ignore')
    assert_result_count(res, 1)
    assert_result_count(res, 1, path=origds.path)
    # now we change the payload file to have some metadata-change signal
    # and save the entire hierarchy
    payload.unlink()
    payload.write_text(u'BIGONE')
    ds.save(recursive=True)
    assert_repo_status(ds.path)

    # FOR REAL: aggregate the aggregate on 'orig' from 'source'
    # this must not REaggregate 'orig'
    # there is no trailing slash
    ds.meta_aggregate(op.join('source', 'orig'))
    # the freshly aggregated, but outdated metadata still reports
    # a zero bytesize for the payload file
    eq_(
        ds.meta_dump(payload, reporton='files')[0]['metadata']['metalad_core']
        ['contentbytesize'], 0)
    # and now with actual aggregation from orig
    ds.meta_aggregate(op.join('source', 'orig') + op.sep)
    # picks up the new size
    eq_(
        ds.meta_dump(payload, reporton='files')[0]['metadata']['metalad_core']
        ['contentbytesize'], 6)
Beispiel #10
0
def test_merge_conflict(path):
    path = Path(path)
    ds_src = Dataset(path / "src").create()
    ds_src_s0 = ds_src.create("s0")
    ds_src_s1 = ds_src.create("s1")
    ds_src.save()

    ds_clone = install(source=ds_src.path,
                       path=path / "clone",
                       recursive=True,
                       result_xfm="datasets")
    ds_clone_s0 = Dataset(path / "clone" / "s0")
    ds_clone_s1 = Dataset(path / "clone" / "s1")

    (ds_src.pathobj / "foo").write_text("src content")
    ds_src.save(to_git=True)

    (ds_clone.pathobj / "foo").write_text("clone content")
    ds_clone.save(to_git=True)

    # Top-level merge failure
    res = ds_clone.update(merge=True, on_failure="ignore")
    assert_in_results(res, action="merge", status="error")
    assert_in_results(res, action="update", status="error")
    # Deal with the conflicts. Note that save() won't handle this gracefully
    # because it will try to commit with a pathspec, which git doesn't allow
    # during a merge.
    ds_clone.repo.call_git(["checkout", "--theirs", "--", "foo"])
    ds_clone.repo.call_git(["add", "--", "foo"])
    ds_clone.repo.call_git(["commit", "--no-edit"])
    assert_repo_status(ds_clone.path)

    # Top-level and subdataset merge failure
    (ds_src_s0.pathobj / "foo").write_text("src s0 content")
    (ds_src_s1.pathobj / "foo").write_text("no conflict")
    ds_src.save(recursive=True, to_git=True)

    (ds_clone_s0.pathobj / "foo").write_text("clone s0 content")
    ds_clone.save(recursive=True, to_git=True)
    res = ds_clone.update(merge=True, recursive=True, on_failure="ignore")
    assert_result_count(res, 2, action="merge", status="error")
    assert_result_count(res, 2, action="update", status="error")
    assert_in_results(res, action="merge", status="ok", path=ds_clone_s1.path)
    assert_in_results(res, action="update", status="ok", path=ds_clone_s1.path)
    # No saving happens if there's a top-level conflict.
    assert_repo_status(ds_clone.path,
                       modified=[ds_clone_s0.path, ds_clone_s1.path])
Beispiel #11
0
def test_asyncio_forked(temp):
    # temp will be used to communicate from child either it succeeded or not
    temp = Path(temp)
    runner = Runner()
    import os
    try:
        pid = os.fork()
    except BaseException as exc:
        # .fork availability is "Unix", and there are cases where it is "not supported"
        # so we will just skip if no forking is possible
        raise SkipTest(f"Cannot fork: {exc}")
    # if does not fail (in original or in a fork) -- we are good
    if sys.version_info < (3, 8) and pid != 0:
        # for some reason it is crucial to sleep a little (but 0.001 is not enough)
        # in the master process with older pythons or it takes forever to make the child run
        sleep(0.1)
    try:
        runner.run([sys.executable, '--version'], protocol=StdOutCapture)
        if pid == 0:
            temp.write_text("I rule")
    except:
        if pid == 0:
            temp.write_text("I suck")
    if pid != 0:
        # parent: look after the child
        t0 = time()
        try:
            while not temp.exists() or temp.stat().st_size < 6:
                if time() - t0 > 5:
                    raise AssertionError(
                        "Child process did not create a file we expected!")
        finally:
            # kill the child
            os.kill(pid, signal.SIGTERM)
        # see if it was a good one
        eq_(temp.read_text(), "I rule")
    else:
        # sleep enough so parent just kills me the kid before I continue doing bad deeds
        sleep(10)
Beispiel #12
0
def check_create_path_semantics(cwd, create_ds, path_arg, base_path,
                                other_path):
    ds = Dataset(base_path).create()
    os.makedirs(op.join(ds.path, 'some'))
    target_path = ds.pathobj / "some" / "what" / "deeper"
    with chpwd(other_path if cwd == 'elsewhere' else base_path if cwd ==
               'parentds' else str(ds.pathobj / 'some') if cwd ==
               'subdir' else str(Path.cwd())):
        subds = create(
            dataset=ds.path if create_ds == 'abspath' else
            str(ds.pathobj.relative_to(cwd)) if create_ds == 'relpath' else
            ds if create_ds == 'instance' else create_ds,
            path=str(target_path) if path_arg == 'abspath' else
            str(target_path.relative_to(ds.pathobj))
            if path_arg == 'relpath' else op.join('what', 'deeper')
            if path_arg == 'subdir_relpath' else path_arg)
        eq_(subds.pathobj, target_path)
Beispiel #13
0
def check_create_initopts_form(form, path=None):
    path = Path(path)

    template_dir = path / "templates"
    template_dir.mkdir()
    (template_dir / "foo").write_text("")

    forms = {
        "list": [f"--template={template_dir}"],
        "dict": {
            "template": str(template_dir)
        }
    }

    ds = Dataset(path / "ds")
    ds.create(initopts=forms[form])
    ok_exists(ds.repo.dot_git / "foo")
Beispiel #14
0
def test_merge_conflict_in_subdataset_only(path):
    path = Path(path)
    ds_src = Dataset(path / "src").create()
    if ds_src.repo.is_managed_branch():
        # `git annex sync REMOTE` is used on an adjusted branch, but this error
        # depends on `git merge TARGET` being used.
        raise SkipTest("Test depends on non-adjusted branch")
    ds_src_sub_conflict = ds_src.create("sub_conflict")
    ds_src_sub_noconflict = ds_src.create("sub_noconflict")
    ds_src.save()

    # Set up a scenario where one subdataset has a conflict between the remote
    # and local version, but the parent dataset does not have a conflict
    # because it hasn't recorded the subdataset state.
    ds_clone = install(source=ds_src.path,
                       path=path / "clone",
                       recursive=True,
                       result_xfm="datasets")
    ds_clone_sub_conflict = Dataset(path / "clone" / "sub_conflict")
    ds_clone_sub_noconflict = Dataset(path / "clone" / "sub_noconflict")

    (ds_src_sub_conflict.pathobj / "foo").write_text("src content")
    ds_src_sub_conflict.save(to_git=True)

    (ds_clone_sub_conflict.pathobj / "foo").write_text("clone content")
    ds_clone_sub_conflict.save(to_git=True)

    (ds_src_sub_noconflict.pathobj / "foo").write_text("src content")
    ds_src_sub_noconflict.save()

    res = ds_clone.update(merge=True, recursive=True, on_failure="ignore")
    assert_in_results(res,
                      action="merge",
                      status="error",
                      path=ds_clone_sub_conflict.path)
    assert_in_results(res,
                      action="merge",
                      status="ok",
                      path=ds_clone_sub_noconflict.path)
    assert_in_results(res, action="save", status="ok", path=ds_clone.path)
    # We saved the subdataset without a conflict...
    assert_repo_status(ds_clone_sub_noconflict.path)
    # ... but the one with the conflict leaves it for the caller to handle.
    ok_(
        ds_clone_sub_conflict.repo.call_git(
            ["ls-files", "--unmerged", "--", "foo"]).strip())
Beispiel #15
0
def test_get_open_files(p=None):
    pobj = Path(p)
    skip_if_no_module('psutil')
    eq_(get_open_files(p), {})
    f1 = pobj / '1'
    subd = pobj / 'd'
    with f1.open() as f:
        # since lsof does not care about PWD env var etc, paths
        # will not contain symlinks, we better realpath them
        # all before comparison
        eq_(get_open_files(p, log_open=40)[str(f1.resolve())].pid, os.getpid())

    assert not get_open_files(str(subd))

    if on_windows:
        # the remainder of the test assume a certain performance.
        # however, on windows get_open_files() can be very slow
        # (e.g. the first invocation in this test (above) can easily
        # take 30-50s). It is not worth slowing the tests to
        # accommodate this issue, given we have tested proper functioning
        # in principle already above).
        return

    # if we start a process within that directory, should get informed
    from subprocess import (
        PIPE,
        Popen,
    )
    from time import time
    t0 = time()
    proc = Popen([
        sys.executable, '-c',
        r'import sys; sys.stdout.write("OK\n"); sys.stdout.flush();'
        r'import time; time.sleep(10)'
    ],
                 stdout=PIPE,
                 cwd=str(subd))
    # Assure that it started and we read the OK
    eq_(ensure_unicode(proc.stdout.readline().strip()), u"OK")
    assert time(
    ) - t0 < 5  # that we were not stuck waiting for process to finish
    eq_(get_open_files(p)[str(subd.resolve())].pid, proc.pid)
    eq_(get_open_files(subd)[str(subd.resolve())].pid, proc.pid)
    proc.terminate()
    assert_equal(get_open_files(str(subd)), {})
Beispiel #16
0
def test_copy_file_datalad_specialremote(workdir=None,
                                         webdir=None,
                                         weburl=None):
    workdir = Path(workdir)
    src_ds = Dataset(workdir / 'src').create()
    # enable datalad special remote
    src_ds.repo.init_remote(DATALAD_SPECIAL_REMOTE, [
        'encryption=none', 'type=external',
        'externaltype={}'.format(DATALAD_SPECIAL_REMOTE), 'autoenable=true'
    ])
    # put files into the dataset by URL
    src_ds.download_url('/'.join((weburl, 'webfile1')), path='myfile1.txt')
    src_ds.download_url('/'.join((weburl, 'webfile2')), path='myfile2.txt')
    # approx test that the file is known to a remote
    # that is not the web remote
    assert_in_results(
        src_ds.repo.whereis('myfile1.txt', output='full').values(),
        here=False,
        description='[{}]'.format(DATALAD_SPECIAL_REMOTE),
    )
    # now a new dataset
    dest_ds = Dataset(workdir / 'dest').create()
    # no special remotes
    eq_(dest_ds.repo.get_special_remotes(), {})
    # must call with a dataset to get change saved, in order for drop
    # below to work properly without getting in reckless mode
    dest_ds.copy_file([src_ds.pathobj / 'myfile1.txt', dest_ds.pathobj])
    # we have an special remote in the destination dataset now
    assert_in_results(
        dest_ds.repo.get_special_remotes().values(),
        externaltype=DATALAD_SPECIAL_REMOTE,
    )
    # and it works
    dest_ds.drop('myfile1.txt')
    dest_ds.repo.get('myfile1.txt', remote='datalad')
    ok_file_has_content(dest_ds.pathobj / 'myfile1.txt', '123')

    # now replace file in dest with a different content at the same path
    # must call with a dataset to get change saved, in order for drop
    dest_ds.copy_file(
        [src_ds.pathobj / 'myfile2.txt', dest_ds.pathobj / 'myfile1.txt'])
    dest_ds.drop('myfile1.txt')
    dest_ds.repo.get('myfile1.txt', remote='datalad')
    # no gets the "same path" but yields different content
    ok_file_has_content(dest_ds.pathobj / 'myfile1.txt', 'abc')
def test_create_osf_simple(path):

    ds = Dataset(path).create(force=True)
    ds.save()

    file1 = Path('ds') / "file1.txt"

    create_results = ds.create_sibling_osf(title="CI dl-create",
                                           name="osf-storage")

    assert_result_count(create_results, 2, status='ok', type='dataset')

    # if we got here, we created something at OSF;
    # make sure, we clean up afterwards
    try:
        # special remote is configured:
        remote_log = ds.repo.call_git(
            ['cat-file', 'blob', 'git-annex:remote.log'])
        assert_in("project={}".format(create_results[0]['id']), remote_log)

        # copy files over
        ds.repo.copy_to('.', "osf-storage")
        whereis = ds.repo.whereis(str(file1))
        here = ds.config.get("annex.uuid")
        # files should be 'here' and on remote end:
        assert_equal(len(whereis), 2)
        assert_in(here, whereis)

        # drop content here
        ds.drop('.')
        whereis = ds.repo.whereis(str(file1))
        # now on remote end only
        assert_equal(len(whereis), 1)
        assert_not_in(here, whereis)

        # and get content again from remote:
        ds.get('.')
        whereis = ds.repo.whereis(str(file1))
        assert_equal(len(whereis), 2)
        assert_in(here, whereis)
    finally:
        # clean remote end:
        cred = get_credentials(allow_interactive=False)
        osf = OSF(**cred)
        delete_project(osf.session, create_results[0]['id'])
Beispiel #18
0
def test_guess_dot_git(path=None, url=None, tdir=None, *, annex):
    repo = (AnnexRepo if annex else GitRepo)(path, create=True)
    repo.add('file.txt', git=not annex)
    repo.commit()

    # we need to prepare to be served via http, otherwise it must fail
    with swallow_logs() as cml:
        assert_raises(IncompleteResultsError, install, path=tdir, source=url)
    ok_(not exists(tdir))

    Runner(cwd=path).run(['git', 'update-server-info'])

    with swallow_logs() as cml:
        installed = install(tdir, source=url)
        assert_not_in("Failed to get annex.uuid", cml.out)
    eq_(installed.pathobj.resolve(), Path(tdir).resolve())
    ok_(exists(tdir))
    assert_repo_status(tdir, annex=annex)
def test_gitannex(osf_id, dspath):
    from datalad.cmd import (GitRunner, WitlessRunner)
    dspath = Path(dspath)

    ds = Dataset(dspath).create()

    # add remote parameters here
    init_remote_opts = ["project={}".format(osf_id)]

    # add special remote
    init_opts = common_init_opts + init_remote_opts
    ds.repo.init_remote('osfproject', options=init_opts)

    # run git-annex-testremote
    # note, that we don't want to capture output. If something goes wrong we
    # want to see it in test build's output log.
    WitlessRunner(cwd=dspath, env=GitRunner.get_git_environ_adjusted()).run(
        ['git', 'annex', 'testremote', 'osfproject', "--fast"])
Beispiel #20
0
def test_global_config():

    # from within tests, global config should be read from faked $HOME (see
    # setup_package) or from GIT_CONFIG_GLOBAL

    if 'GIT_CONFIG_GLOBAL' in os.environ.keys():
        glb_cfg_file = Path(os.environ.get('GIT_CONFIG_GLOBAL'))
    else:
        glb_cfg_file = Path(os.path.expanduser('~')) / '.gitconfig'
    assert any(
        glb_cfg_file.samefile(Path(p)) for p in dl_cfg._stores['git']['files'])
    assert_equal(dl_cfg.get("user.name"), "DataLad Tester")
    assert_equal(dl_cfg.get("user.email"), "*****@*****.**")
Beispiel #21
0
    def custom_result_renderer(res, **kwargs):
        if (res['status'] != 'ok' or res['action']
                not in ('get_configuration', 'dump_configuration')):
            if 'message' not in res and 'name' in res:
                suffix = '={}'.format(res['value']) if 'value' in res else ''
                res['message'] = '{}{}'.format(res['name'], suffix)
            default_result_renderer(res)
            return
        # TODO source
        from datalad.ui import ui
        name = res['name']
        if res['action'] == 'dump_configuration':
            for key in ('purpose', 'description'):
                s = res.get(key)
                if s:
                    ui.message('\n'.join(
                        wrap(
                            s,
                            initial_indent='# ',
                            subsequent_indent='# ',
                        )))

        if kwargs.get('recursive', False):
            have_subds = res['path'] != res['refds']
            # we need to mark up from which dataset results are reported
            prefix = '<ds>{}{}:'.format(
                '/' if have_subds else '',
                Path(res['path']).relative_to(res['refds']).as_posix()
                if have_subds else '',
            )
        else:
            prefix = ''

        if kwargs.get('action', None) == 'dump':
            ui.message('{}{}={}'.format(
                prefix,
                ac.color_word(name, ac.BOLD),
                res['value'] if res['value'] is not None else '',
            ))
        else:
            ui.message('{}{}'.format(
                prefix,
                res['value'] if res['value'] is not None else '',
            ))
Beispiel #22
0
def test_clone_report_permission_issue(tdir):
    pdir = Path(tdir) / 'protected'
    pdir.mkdir()
    # make it read-only
    pdir.chmod(0o555)
    with chpwd(pdir):
        res = clone('///', result_xfm=None, return_type='list', on_failure='ignore')
        assert_status('error', res)
        assert_result_count(
            res, 1, status='error',
            message="could not create work tree dir '%s/%s': Permission denied"
                    % (pdir, get_datasets_topdir())
        )
Beispiel #23
0
def _test_setup_ds_in_store(io_cls, io_args, store):
    io = io_cls(*io_args)
    store = Path(store)
    # ATM create_ds_in_store doesn't care what kind of ID is provided
    dsid = "abc123456"

    ds_path = store / dsid[:3] / dsid[3:]  # store layout version 1
    version_file = ds_path / 'ria-layout-version'
    archives = ds_path / 'archives'
    objects = ds_path / 'annex' / 'objects'
    git_config = ds_path / 'config'

    # invalid store version:
    assert_raises(UnknownLayoutVersion, create_ds_in_store, io, store, dsid,
                  '1', 'abc')

    # invalid obj version:
    assert_raises(UnknownLayoutVersion, create_ds_in_store, io, store, dsid,
                  'abc', '1')

    # version 1
    create_store(io, store, '1')
    create_ds_in_store(io, store, dsid, '1', '1')
    for p in [ds_path, archives, objects]:
        assert_true(p.is_dir(), msg="Not a directory: %s" % str(p))
    for p in [version_file]:
        assert_true(p.is_file(), msg="Not a file: %s" % str(p))
    assert_equal(version_file.read_text(), "1\n")

    # conflicting version exists at target:
    assert_raises(ValueError, create_ds_in_store, io, store, dsid, '2', '1')

    # version 2
    # Note: The only difference between version 1 and 2 are supposed to be the
    #       key paths (dirhashlower vs mixed), which has nothing to do with
    #       setup routine.
    rmtree(str(store))
    create_store(io, store, '1')
    create_ds_in_store(io, store, dsid, '2', '1')
    for p in [ds_path, archives, objects]:
        assert_true(p.is_dir(), msg="Not a directory: %s" % str(p))
    for p in [version_file]:
        assert_true(p.is_file(), msg="Not a file: %s" % str(p))
    assert_equal(version_file.read_text(), "2\n")
Beispiel #24
0
def test_get_local_file_url_compatibility(path):
    # smoke test for file:// URL compatibility with other datalad/git/annex
    # pieces
    path = Path(path)
    ds1 = Dataset(path / 'ds1').create()
    ds2 = Dataset(path / 'ds2').create()
    testfile = path / 'testfile.txt'
    testfile.write_text('some')

    # compat with annex addurl
    ds1.repo.add_url_to_file(
        'test.txt', get_local_file_url(testfile, compatibility='git-annex'))

    # compat with git clone/submodule
    assert_status(
        'ok',
        ds1.clone(get_local_file_url(ds2.path, compatibility='git'),
                  result_xfm=None,
                  return_type='generator'))
Beispiel #25
0
def test_push_custom_summary(path=None):
    path = Path(path)
    ds = Dataset(path / "ds").create()

    sib = mk_push_target(ds, "sib", str(path / "sib"), bare=False, annex=False)
    (sib.pathobj / "f1").write_text("f1")
    sib.save()

    (ds.pathobj / "f2").write_text("f2")
    ds.save()

    # These options are true by default and our tests usually run with a
    # temporary home, but set them to be sure.
    ds.config.set("advice.pushUpdateRejected", "true", scope="local")
    ds.config.set("advice.pushFetchFirst", "true", scope="local")
    with swallow_outputs() as cmo:
        ds.push(to="sib", result_renderer="default", on_failure="ignore")
        assert_in("Hints:", cmo.out)
        assert_in("action summary:", cmo.out)
Beispiel #26
0
def collect_jsonld_metadata(dspath, res, nodes_by_context, contexts):
    """Sift through a metadata result and gather JSON-LD documents

    Parameters
    ----------
    dspath : str or Path
      Native absolute path of the dataset that shall be used to determine
      the relative path (name) of a file-result. This would typically be
      the path to the dataset that contains the file.
    res : dict
      Result dictionary as produced by `meta_extract()` or
      `meta_dump()`.
    nodes_by_context : dict
      JSON-LD documented are collected in this dict, using their context
      as keys.
    contexts : dict
      Holds a previously discovered context for any extractor.
    """
    if res['type'] == 'dataset':
        _native_metadata_to_graph_nodes(
            res['metadata'],
            nodes_by_context,
            contexts,
        )
    else:
        fmeta = res['metadata']
        # pull out a datalad ID from -core, if there is any
        fid = fmeta.get('metalad_core', {}).get('@id', None)
        _native_metadata_to_graph_nodes(
            fmeta,
            nodes_by_context,
            contexts,
            defaults={
                '@id': fid,
                # do not have a @type default here, it would
                # duplicate across all extractor records
                # let the core extractor deal with this
                #'@type': "DigitalDocument",
                # maybe we need something more fitting than
                # name
                'name': Path(res['path']).relative_to(dspath).as_posix(),
            },
        )
Beispiel #27
0
def test_copy_file_into_nonannex(workdir):
    workdir = Path(workdir)
    src_ds = Dataset(workdir / 'src').create()
    (src_ds.pathobj / 'present.txt').write_text('123')
    (src_ds.pathobj / 'gone.txt').write_text('abc')
    src_ds.save()
    src_ds.drop('gone.txt', check=False)

    # destination has no annex
    dest_ds = Dataset(workdir / 'dest').create(annex=False)
    # no issue copying a file that has content
    copy_file([src_ds.pathobj / 'present.txt', dest_ds.pathobj])
    ok_file_has_content(dest_ds.pathobj / 'present.txt', '123')
    # but cannot handle a dropped file, no chance to register
    # availability info in an annex
    assert_status(
        'impossible',
        copy_file([src_ds.pathobj / 'gone.txt', dest_ds.pathobj],
                  on_failure='ignore'))
Beispiel #28
0
def metadata_locator(fs_metadata=None,
                     path=None,
                     ds_path=None,
                     metadata_path=None):
    """path to metadata file of node associated with the fs_metadata dictionary

    Parameters
    ----------
    fs_metadata: dict
      Metadata json of a node
    path: str
      Path to directory of metadata to be rendered
    ds_path: str
      Path to dataset root
    metadata_path: str
      Path to metadata root. Calculated relative to ds_path

    Returns
    -------
    str
      path to metadata of current node
    """

    # use implicit paths unless paths explicitly specified
    # Note: usage of ds_path as if it was the Repo's path. Therefore use
    # realpath, since we switched to have symlinks resolved in repos but not in
    # datasets
    ds_path = str(Path(ds_path).resolve()) if ds_path else fs_metadata['repo']
    path = path or fs_metadata['path']
    metadata_path = metadata_path or '.git/datalad/metadata'
    # directory metadata directory tree location
    metadata_dir = opj(ds_path, metadata_path)
    # relative path of current directory wrt dataset root
    dir_path = relpath(path, ds_path) if isabs(path) else path
    # normalize to / -- TODO, switch to '.' which is now actually the name since path is relative in web meta?
    if dir_path in ('.', None, ''):
        dir_path = '/'
    # create md5 hash of current directory's relative path
    metadata_hash = hashlib.md5(dir_path.encode('utf-8')).hexdigest()
    # construct final path to metadata file
    metadata_file = opj(metadata_dir, metadata_hash)

    return metadata_file
Beispiel #29
0
def test_aggregate_into_top_no_extraction(path):

    path = Path(path)
    superds = Dataset(path).create()
    subds = superds.create(path / 'sub')
    # put a single (empty) file in subds to have some metadata-relevant
    # content
    payload = subds.pathobj / 'CONTENT'
    payload.write_text(u'some')
    superds.save(recursive=True)
    assert_repo_status(superds.path)
    # have metadata aggregated in the subds

    res = subds.meta_aggregate()

    # FTR: Doing it again, yields extraction not needed:
    assert_result_count(subds.meta_aggregate(),
                        1,
                        action='meta_extract',
                        status='notneeded',
                        type='dataset')

    # update subds entry in super
    superds.save(recursive=True)
    # super has no metadata on sub's content
    assert_status('impossible', superds.meta_dump('sub/', on_failure='ignore'))
    # but subds has
    res = subds.meta_dump('.', on_failure='ignore')
    assert_result_count(res, 2)
    assert_result_count(res, 2, status='ok')
    assert_result_count(res, 1, type='dataset')
    assert_result_count(res, 1, type='file')
    # Now, aggregate into top
    res = superds.meta_aggregate('sub/', into='top')
    # super should now be able to report:
    assert_status('ok', superds.meta_dump('sub/', on_failure='ignore'))
    # Re-extraction should not be required:
    assert_result_count(res,
                        1,
                        action='meta_extract',
                        status='notneeded',
                        type='dataset')
Beispiel #30
0
    def custom_result_renderer(res, **kwargs):  # pragma: more cover
        # Don't render things like 'status' for clean-info messages -
        # seems rather meaningless.

        from os import getcwd

        import datalad.support.ansi_colors as ac
        from datalad.interface.utils import generic_result_renderer
        from datalad.utils import Path

        if res['action'] == 'clean':
            # default renderer is just fine
            return generic_result_renderer(res)
        elif res['action'] != 'clean [dry-run]':
            # Result didn't come from within `clean`.
            # Should be handled elsewhere.
            return

        assert res['action'] == 'clean [dry-run]'

        if res.get('status', None) == 'ok':
            from datalad.ui import ui

            # when to render relative paths:
            #  1) if a dataset arg was given
            #  2) if CWD is the refds

            refds = res.get('refds', None)
            refds = refds if kwargs.get('dataset', None) is not None \
                             or refds == getcwd() else None
            path = res['path'] if refds is None \
                else str(Path(res['path']).relative_to(refds))

            ui.message(u"{path}: {message}".format(
                path=ac.color_word(path, ac.BOLD),
                message=(res['message'][0] % res['message'][1:] if isinstance(
                    res['message'], tuple) else res['message']) if res.get(
                        'message', None) else ''))

        else:
            # Any other status than 'ok' is reported the default way.
            return generic_result_renderer(res)
Beispiel #31
0
def test_uninstall_subdataset(src, dst):

    ds = install(dst, source=src, recursive=True)
    ok_(ds.is_installed())
    known_subdss = ds.subdatasets(result_xfm='datasets')
    for subds in ds.subdatasets(result_xfm='datasets'):
        ok_(subds.is_installed())

        repo = subds.repo

        annexed_files = repo.get_content_annexinfo(init=None)
        repo.get([str(f) for f in annexed_files])

        # drop data of subds:
        res = ds.drop(path=subds.path, result_xfm='paths')
        ok_(all(str(f) in res for f in annexed_files))
        ainfo = repo.get_content_annexinfo(paths=annexed_files,
                                           eval_availability=True)
        ok_(all(not st["has_content"] for st in ainfo.values()))
        # subdataset is still known
        assert_in(subds.path, ds.subdatasets(result_xfm='paths'))

    eq_(ds.subdatasets(result_xfm='datasets'), known_subdss)

    for subds in ds.subdatasets(result_xfm='datasets'):
        # uninstall subds itself:
        # simulate a cmdline invocation pointing to the subdataset
        # with a relative path from outside the superdataset to catch
        # https://github.com/datalad/datalad/issues/4001
        pwd = Path(dst).parent
        with chpwd(str(pwd)):
            res = uninstall(
                dataset=ds.path,
                path=str(subds.pathobj.relative_to(pwd)),
                result_xfm='datasets',
            )
        eq_(res[0], subds)
        ok_(not subds.is_installed())
        # just a deinit must not remove the subdataset registration
        eq_(ds.subdatasets(result_xfm='datasets'), known_subdss)
        # mountpoint of subdataset should still be there
        ok_(exists(subds.path))
Beispiel #32
0
def test_merge_conflict_in_subdataset_only(path):
    path = Path(path)
    ds_src = Dataset(path / "src").create()
    ds_src_sub_conflict = ds_src.create("sub_conflict")
    ds_src_sub_noconflict = ds_src.create("sub_noconflict")
    ds_src.save()

    # Set up a scenario where one subdataset has a conflict between the remote
    # and local version, but the parent dataset does not have a conflict
    # because it hasn't recorded the subdataset state.
    ds_clone = install(source=ds_src.path,
                       path=path / "clone",
                       recursive=True,
                       result_xfm="datasets")
    ds_clone_sub_conflict = Dataset(path / "clone" / "sub_conflict")
    ds_clone_sub_noconflict = Dataset(path / "clone" / "sub_noconflict")

    (ds_src_sub_conflict.pathobj / "foo").write_text("src content")
    ds_src_sub_conflict.save(to_git=True)

    (ds_clone_sub_conflict.pathobj / "foo").write_text("clone content")
    ds_clone_sub_conflict.save(to_git=True)

    (ds_src_sub_noconflict.pathobj / "foo").write_text("src content")
    ds_src_sub_noconflict.save()

    res = ds_clone.update(merge=True, recursive=True, on_failure="ignore")
    assert_in_results(res,
                      action="merge",
                      status="error",
                      path=ds_clone_sub_conflict.path)
    assert_in_results(res,
                      action="merge",
                      status="ok",
                      path=ds_clone_sub_noconflict.path)
    assert_in_results(res, action="save", status="ok", path=ds_clone.path)
    # We saved the subdataset without a conflict...
    assert_repo_status(ds_clone_sub_noconflict.path)
    # ... but the one with the conflict leaves it for the caller to handle.
    ok_(
        ds_clone_sub_conflict.repo.call_git(
            ["ls-files", "--unmerged", "--", "foo"], read_only=True).strip())