Beispiel #1
0
def check_basic_scenario(url, d=None):
    ds = Dataset(d).create()
    annex = ds.repo

    # TODO skip if no boto or no credentials
    get_test_providers(url)  # so to skip if unknown creds

    # Let's try to add some file which we should have access to
    ds.download_url(url)
    ds.save()

    # git-annex got a fix where it stopped replacing - in the middle of the filename
    # Let's cater to the developers who might have some intermediate version and not
    # easy to compare -- we will just check that only one file there is an that it
    # matches what we expect when outside of the development versions range:
    filenames = glob.glob(op.join(d, '3versions[-_]allversioned.txt'))
    eq_(len(filenames), 1)
    filename = op.basename(filenames[0])
    if external_versions['cmd:annex'] < '8.20200501':
        assert_in('_', filename)
    # Date after the fix in 8.20200501-53-gcabbc91b1
    elif external_versions['cmd:annex'] >= '8.20200512':
        assert_in('-', filename)
    else:
        pass  # either of those is ok

    whereis1 = annex.whereis(filename, output='full')
    eq_(len(whereis1), 2)  # here and datalad
    annex.drop(filename)

    whereis2 = annex.whereis(filename, output='full')
    eq_(len(whereis2), 1)  # datalad

    # make sure that there are no "hidden" error messages, despite the
    # whereis command succeeding
    # https://github.com/datalad/datalad/issues/6453#issuecomment-1047533276
    from datalad.runner import StdOutErrCapture

    # we need to swallow logs since if DATALAD_LOG_LEVEL is set low, we
    # would get all the git-annex debug output in stderr
    with swallow_logs(new_level=logging.INFO) as cml:
        out = annex._call_annex(['whereis'], protocol=StdOutErrCapture)
        eq_(out['stderr'].strip(), '')

    # if we provide some bogus address which we can't access, we shouldn't pollute output
    with assert_raises(CommandError) as cme:
        annex.add_url_to_file('bogus', url + '_bogus')
    assert_in('addurl: 1 failed', cme.value.stderr)
Beispiel #2
0
def test_copy_file(workdir=None, webdir=None, weburl=None):
    workdir = Path(workdir)
    webdir = Path(webdir)
    src_ds = Dataset(workdir / 'src').create()
    # put a file into the dataset by URL and drop it again
    src_ds.download_url('/'.join((weburl, 'webfile1')), path='myfile1.txt')
    src_ds.download_url('/'.join((weburl, 'webfile2')),
                        path=opj('subdir', 'myfile2.txt'))
    ok_file_has_content(src_ds.pathobj / 'myfile1.txt', '123')
    # now create a fresh dataset
    dest_ds = Dataset(workdir / 'dest').create()
    if dest_ds.repo._check_version_kludges("fromkey-supports-unlocked") or \
       not dest_ds.repo.is_managed_branch():
        # unless we have a target ds on a cripples FS (where `annex fromkey`
        # doesn't work until after 8.20210428), we can even drop the file
        # content in the source repo
        src_ds.drop('myfile1.txt', reckless='kill')
        nok_(src_ds.repo.file_has_content('myfile1.txt'))
    # copy the file from the source dataset into it.
    # it must copy enough info to actually put datalad into the position
    # to obtain the file content from the original URL
    dest_ds.copy_file(src_ds.pathobj / 'myfile1.txt')
    dest_ds.get('myfile1.txt')
    ok_file_has_content(dest_ds.pathobj / 'myfile1.txt', '123')
    # purposefully pollute the employed tmp folder to check that we do not trip
    # over such a condition
    tmploc = dest_ds.pathobj / '.git' / 'tmp' / 'datalad-copy' / 'some'
    tmploc.parent.mkdir(parents=True)
    tmploc.touch()
    # copy again, but to different target file name
    # (source+dest pair now)
    dest_ds.copy_file(
        [src_ds.pathobj / 'myfile1.txt', dest_ds.pathobj / 'renamed.txt'])
    ok_file_has_content(dest_ds.pathobj / 'renamed.txt', '123')
    # copying more than one at once
    dest_ds.copy_file([
        src_ds.pathobj / 'myfile1.txt',
        src_ds.pathobj / 'subdir' / 'myfile2.txt', dest_ds.pathobj
    ])
    # copy directly from a non-dataset location
    dest_ds.copy_file(webdir / 'webfile1')

    # copy from annex dataset into gitrepo
    git_ds = Dataset(workdir / 'git').create(annex=False)
    git_ds.copy_file(src_ds.pathobj / 'subdir' / 'myfile2.txt')
Beispiel #3
0
def test_copy_file_datalad_specialremote(workdir=None,
                                         webdir=None,
                                         weburl=None):
    workdir = Path(workdir)
    src_ds = Dataset(workdir / 'src').create()
    # enable datalad special remote
    src_ds.repo.init_remote(DATALAD_SPECIAL_REMOTE, [
        'encryption=none', 'type=external',
        'externaltype={}'.format(DATALAD_SPECIAL_REMOTE), 'autoenable=true'
    ])
    # put files into the dataset by URL
    src_ds.download_url('/'.join((weburl, 'webfile1')), path='myfile1.txt')
    src_ds.download_url('/'.join((weburl, 'webfile2')), path='myfile2.txt')
    # approx test that the file is known to a remote
    # that is not the web remote
    assert_in_results(
        src_ds.repo.whereis('myfile1.txt', output='full').values(),
        here=False,
        description='[{}]'.format(DATALAD_SPECIAL_REMOTE),
    )
    # now a new dataset
    dest_ds = Dataset(workdir / 'dest').create()
    # no special remotes
    eq_(dest_ds.repo.get_special_remotes(), {})
    # must call with a dataset to get change saved, in order for drop
    # below to work properly without getting in reckless mode
    dest_ds.copy_file([src_ds.pathobj / 'myfile1.txt', dest_ds.pathobj])
    # we have an special remote in the destination dataset now
    assert_in_results(
        dest_ds.repo.get_special_remotes().values(),
        externaltype=DATALAD_SPECIAL_REMOTE,
    )
    # and it works
    dest_ds.drop('myfile1.txt')
    dest_ds.repo.get('myfile1.txt', remote='datalad')
    ok_file_has_content(dest_ds.pathobj / 'myfile1.txt', '123')

    # now replace file in dest with a different content at the same path
    # must call with a dataset to get change saved, in order for drop
    dest_ds.copy_file(
        [src_ds.pathobj / 'myfile2.txt', dest_ds.pathobj / 'myfile1.txt'])
    dest_ds.drop('myfile1.txt')
    dest_ds.repo.get('myfile1.txt', remote='datalad')
    # no gets the "same path" but yields different content
    ok_file_has_content(dest_ds.pathobj / 'myfile1.txt', 'abc')
Beispiel #4
0
def test_download_docker_blob(path=None):
    from datalad.consts import (
        DATALAD_SPECIAL_REMOTE,
        DATALAD_SPECIAL_REMOTES_UUIDS,
    )
    from datalad.customremotes.base import init_datalad_remote

    with patch_config({"datalad.repo.backend": "SHA256E"}):
        ds = Dataset(path).create()
    ds_repo = ds.repo
    init_datalad_remote(ds_repo, DATALAD_SPECIAL_REMOTE)

    id_ = "f0b02e9d092d905d0d87a8455a1ae3e9bb47b4aa3dc125125ca5cd10d6441c9f"
    outfile = ds_repo.pathobj / "blob"
    url = "https://registry-1.docker.io/v2/library/busybox/blobs/sha256:" + id_
    ds.download_url(urls=[url], path=str(outfile))

    annex_info = ds.repo.get_content_annexinfo(paths=[outfile], init=None)
    eq_(id_, annex_info[outfile]["keyname"])
    assert_in(DATALAD_SPECIAL_REMOTES_UUIDS[DATALAD_SPECIAL_REMOTE],
              ds_repo.whereis([str(outfile)])[0])