Example #1
0
def test_copy_file_errors(dspath1, dspath2, nondspath):
    ds1 = Dataset(dspath1)
    # nothing given
    assert_raises(ValueError, copy_file)
    # no target directory given
    assert_raises(ValueError, ds1.copy_file, 'somefile')
    # using multiple sources and --specs-from
    assert_raises(ValueError, ds1.copy_file, ['1', '2', '3'], specs_from='-')
    # trying to copy to a dir that is not in a dataset
    ds1.create()
    assert_status(
        'error',
        ds1.copy_file('somepath', target_dir=nondspath, on_failure='ignore'))
    # copy into a dataset that is not in the reference dataset
    ds2 = Dataset(dspath2).create()
    assert_status(
        'error',
        ds1.copy_file('somepath', target_dir=dspath2, on_failure='ignore'))

    # attempt to copy from a directory, but no recursion is enabled.
    # use no reference ds to excercise a different code path
    assert_status('impossible',
                  copy_file([nondspath, dspath1], on_failure='ignore'))

    # attempt to copy a file that doesn't exist
    assert_status('impossible',
                  copy_file(['funky', dspath1], on_failure='ignore'))

    # attempt to copy a file without a destination given
    assert_raises(ValueError, copy_file, 'somepath')
    assert_status('impossible',
                  copy_file(specs_from=['somepath'], on_failure='ignore'))
Example #2
0
def test_copy_file(workdir=None, webdir=None, weburl=None):
    workdir = Path(workdir)
    webdir = Path(webdir)
    src_ds = Dataset(workdir / 'src').create()
    # put a file into the dataset by URL and drop it again
    src_ds.download_url('/'.join((weburl, 'webfile1')), path='myfile1.txt')
    src_ds.download_url('/'.join((weburl, 'webfile2')),
                        path=opj('subdir', 'myfile2.txt'))
    ok_file_has_content(src_ds.pathobj / 'myfile1.txt', '123')
    # now create a fresh dataset
    dest_ds = Dataset(workdir / 'dest').create()
    if dest_ds.repo._check_version_kludges("fromkey-supports-unlocked") or \
       not dest_ds.repo.is_managed_branch():
        # unless we have a target ds on a cripples FS (where `annex fromkey`
        # doesn't work until after 8.20210428), we can even drop the file
        # content in the source repo
        src_ds.drop('myfile1.txt', reckless='kill')
        nok_(src_ds.repo.file_has_content('myfile1.txt'))
    # copy the file from the source dataset into it.
    # it must copy enough info to actually put datalad into the position
    # to obtain the file content from the original URL
    dest_ds.copy_file(src_ds.pathobj / 'myfile1.txt')
    dest_ds.get('myfile1.txt')
    ok_file_has_content(dest_ds.pathobj / 'myfile1.txt', '123')
    # purposefully pollute the employed tmp folder to check that we do not trip
    # over such a condition
    tmploc = dest_ds.pathobj / '.git' / 'tmp' / 'datalad-copy' / 'some'
    tmploc.parent.mkdir(parents=True)
    tmploc.touch()
    # copy again, but to different target file name
    # (source+dest pair now)
    dest_ds.copy_file(
        [src_ds.pathobj / 'myfile1.txt', dest_ds.pathobj / 'renamed.txt'])
    ok_file_has_content(dest_ds.pathobj / 'renamed.txt', '123')
    # copying more than one at once
    dest_ds.copy_file([
        src_ds.pathobj / 'myfile1.txt',
        src_ds.pathobj / 'subdir' / 'myfile2.txt', dest_ds.pathobj
    ])
    # copy directly from a non-dataset location
    dest_ds.copy_file(webdir / 'webfile1')

    # copy from annex dataset into gitrepo
    git_ds = Dataset(workdir / 'git').create(annex=False)
    git_ds.copy_file(src_ds.pathobj / 'subdir' / 'myfile2.txt')
Example #3
0
def test_copy_file_datalad_specialremote(workdir=None,
                                         webdir=None,
                                         weburl=None):
    workdir = Path(workdir)
    src_ds = Dataset(workdir / 'src').create()
    # enable datalad special remote
    src_ds.repo.init_remote(DATALAD_SPECIAL_REMOTE, [
        'encryption=none', 'type=external',
        'externaltype={}'.format(DATALAD_SPECIAL_REMOTE), 'autoenable=true'
    ])
    # put files into the dataset by URL
    src_ds.download_url('/'.join((weburl, 'webfile1')), path='myfile1.txt')
    src_ds.download_url('/'.join((weburl, 'webfile2')), path='myfile2.txt')
    # approx test that the file is known to a remote
    # that is not the web remote
    assert_in_results(
        src_ds.repo.whereis('myfile1.txt', output='full').values(),
        here=False,
        description='[{}]'.format(DATALAD_SPECIAL_REMOTE),
    )
    # now a new dataset
    dest_ds = Dataset(workdir / 'dest').create()
    # no special remotes
    eq_(dest_ds.repo.get_special_remotes(), {})
    # must call with a dataset to get change saved, in order for drop
    # below to work properly without getting in reckless mode
    dest_ds.copy_file([src_ds.pathobj / 'myfile1.txt', dest_ds.pathobj])
    # we have an special remote in the destination dataset now
    assert_in_results(
        dest_ds.repo.get_special_remotes().values(),
        externaltype=DATALAD_SPECIAL_REMOTE,
    )
    # and it works
    dest_ds.drop('myfile1.txt')
    dest_ds.repo.get('myfile1.txt', remote='datalad')
    ok_file_has_content(dest_ds.pathobj / 'myfile1.txt', '123')

    # now replace file in dest with a different content at the same path
    # must call with a dataset to get change saved, in order for drop
    dest_ds.copy_file(
        [src_ds.pathobj / 'myfile2.txt', dest_ds.pathobj / 'myfile1.txt'])
    dest_ds.drop('myfile1.txt')
    dest_ds.repo.get('myfile1.txt', remote='datalad')
    # no gets the "same path" but yields different content
    ok_file_has_content(dest_ds.pathobj / 'myfile1.txt', 'abc')
Example #4
0
def test_copy_file_into_dshierarchy(srcdir, destdir):
    srcdir = Path(srcdir)
    src_ds = Dataset(srcdir).create(force=True)
    src_ds.save()
    # now build two nested datasets, such that lvl2 ends up in a subdataset
    dest_ds = Dataset(destdir).create()
    dest_ds.create(dest_ds.pathobj / 'lvl2')
    assert_repo_status(dest_ds.path)

    dest_ds.copy_file([src_ds.pathobj / 'lvl1', dest_ds.pathobj],
                      recursive=True)
    assert_repo_status(dest_ds.path)

    # we get the same structure as the input, just distributed across
    # nested datasets
    eq_(*[
        sorted(r for r in d.status(result_xfm='relpaths', result_renderer=None)
               # filter out subdataset entry in dest_ds
               if r not in ('lvl2', '.gitmodules')) for d in (src_ds, dest_ds)
    ])
Example #5
0
def test_copy_file_prevent_dotgit_placement(srcpath, destpath):
    src = Dataset(srcpath).create()
    sub = src.create('sub')
    dest = Dataset(destpath).create()

    # recursion doesn't capture .git/
    dest.copy_file(sub.path, recursive=True)
    nok_((dest.pathobj / 'sub' / '.git').exists())

    # explicit instruction results in failure
    assert_status(
        'impossible',
        dest.copy_file(sub.pathobj / '.git',
                       recursive=True,
                       on_failure='ignore'))

    # same when the source has an OK name, but the dest now
    assert_in_results(dest.copy_file(
        [sub.pathobj / '.git' / 'config', dest.pathobj / 'some', '.git'],
        on_failure='ignore'),
                      status='impossible',
                      action='copy_file')
Example #6
0
def test_copy_file_nourl(serv_path=None, orig_path=None, tst_path=None):
    """Tests availability transfer to normal git-annex remote"""
    # prep source dataset that will have the file content
    srv_ds = Dataset(serv_path).create()
    (srv_ds.pathobj / 'myfile.dat').write_text('I am content')
    (srv_ds.pathobj / 'noavail.dat').write_text('null')
    srv_ds.save()
    srv_ds.drop('noavail.dat', reckless='kill')
    # make an empty superdataset, with the test dataset as a subdataset
    orig_ds = Dataset(orig_path).create()
    orig_ds.clone(source=serv_path, path='serv')
    assert_repo_status(orig_ds.path)
    # now copy the test file into the superdataset
    no_avail_file = orig_ds.pathobj / 'serv' / 'noavail.dat'
    assert_in_results(
        orig_ds.copy_file(no_avail_file, on_failure='ignore'),
        status='impossible',
        message='no known location of file content',
        path=str(no_avail_file),
    )
Example #7
0
def _check_copy_file_specs_from(srcdir, destdir, specs, **kwargs):
    ds = Dataset(destdir).create()
    res = ds.copy_file(specs_from=specs, **kwargs)
    return ds, res
Example #8
0
def test_copy_file_prevent_dotgit_placement(srcpath=None, destpath=None):
    src = Dataset(srcpath).create()
    sub = src.create('sub')
    dest = Dataset(destpath).create()

    # recursion doesn't capture .git/
    dest.copy_file(sub.path, recursive=True)
    nok_((dest.pathobj / 'sub' / '.git').exists())

    # explicit instruction results in failure
    assert_status(
        'impossible',
        dest.copy_file(sub.pathobj / '.git',
                       recursive=True,
                       on_failure='ignore'))

    # same when the source has an OK name, but the dest now
    assert_in_results(dest.copy_file(
        [sub.pathobj / '.git' / 'config', dest.pathobj / 'some' / '.git'],
        on_failure='ignore'),
                      status='impossible',
                      action='copy_file')

    # The last path above wasn't treated as a target directory because it
    # wasn't an existing directory. We also guard against a '.git' in the
    # target directory code path, though the handling is different.
    with assert_raises(ValueError):
        dest.copy_file(
            [sub.pathobj / '.git' / 'config', dest.pathobj / '.git'])

    # A source path can have a leading .git/ if the destination is outside of
    # .git/.
    nok_((dest.pathobj / "config").exists())
    dest.copy_file(sub.pathobj / '.git' / 'config')
    ok_((dest.pathobj / "config").exists())

    target = dest.pathobj / 'some'
    nok_(target.exists())
    dest.copy_file([sub.pathobj / '.git' / 'config', target])
    ok_(target.exists())

    # But we only waste so many cycles trying to prevent foot shooting. This
    # next one sneaks by because only .name, not all upstream parts, is checked
    # for each destination that comes out of _yield_specs().
    badobj = dest.pathobj / '.git' / 'objects' / 'i-do-not-exist'
    dest.copy_file([sub.pathobj / '.git' / 'config', badobj])
    ok_(badobj.exists())