Example #1
0
def test_publish_aggregated(path):
    base = Dataset(opj(path, 'origin')).create(force=True)
    # force all metadata objects into the annex
    with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f:
        f.write(
            '** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n')
    base.create('sub', force=True)
    base.add('.', recursive=True)
    ok_clean_git(base.path)
    base.aggregate_metadata(recursive=True, update_mode='all')
    ok_clean_git(base.path)

    # create sibling and publish to it
    spath = opj(path, 'remote')
    base.create_sibling(
        name="local_target",
        sshurl="ssh://localhost",
        target_dir=spath)
    base.publish('.', to='local_target', transfer_data='all')
    remote = Dataset(spath)
    objpath = opj('.datalad', 'metadata', 'objects')
    objs = list(sorted(base.repo.find(objpath)))
    # all object files a present in both datasets
    eq_(all(base.repo.file_has_content(objs)), True)
    eq_(all(remote.repo.file_has_content(objs)), True)
    # and we can squeeze the same metadata out
    eq_(
        [{k: v for k, v in i.items() if k not in ('path', 'refds', 'parentds')}
         for i in base.metadata('sub')],
        [{k: v for k, v in i.items() if k not in ('path', 'refds', 'parentds')}
         for i in remote.metadata('sub')],
    )
Example #2
0
def test_push_wanted(srcpath, dstpath):
    src = Dataset(srcpath).create()

    if src.repo.is_managed_branch():
        # on crippled FS post-update hook enabling via create-sibling doesn't
        # work ATM
        raise SkipTest("no create-sibling on crippled FS")
    (src.pathobj / 'data.0').write_text('0')
    (src.pathobj / 'secure.1').write_text('1')
    (src.pathobj / 'secure.2').write_text('2')
    src.save()

    # Dropping a file to mimic a case of simply not having it locally (thus not
    # to be "pushed")
    src.drop('secure.2', check=False)

    # Annotate sensitive content, actual value "verysecure" does not matter in
    # this example
    src.repo.set_metadata(add={'distribution-restrictions': 'verysecure'},
                          files=['secure.1', 'secure.2'])

    src.create_sibling(
        dstpath,
        annex_wanted="not metadata=distribution-restrictions=*",
        name='target',
    )
    # check that wanted is obeyed, if instructed by configuration
    src.config.set('datalad.push.copy-auto-if-wanted', 'true', where='local')
    res = src.push(to='target')
    assert_in_results(res,
                      action='copy',
                      path=str(src.pathobj / 'data.0'),
                      status='ok')
    for p in ('secure.1', 'secure.2'):
        assert_not_in_results(res, path=str(src.pathobj / p))
    assert_status('notneeded', src.push(to='target'))

    # check that dataset-config cannot overrule this
    src.config.set('datalad.push.copy-auto-if-wanted',
                   'false',
                   where='dataset')
    res = src.push(to='target')
    assert_status('notneeded', res)

    # check the target to really make sure
    dst = Dataset(dstpath)
    # normal file, yes
    eq_((dst.pathobj / 'data.0').read_text(), '0')
    # secure file, no
    if dst.repo.is_managed_branch():
        neq_((dst.pathobj / 'secure.1').read_text(), '1')
    else:
        assert_raises(FileNotFoundError, (dst.pathobj / 'secure.1').read_text)

    # remove local config, must enable push of secure file
    src.config.unset('datalad.push.copy-auto-if-wanted', where='local')
    res = src.push(to='target')
    assert_in_results(res, path=str(src.pathobj / 'secure.1'))
    eq_((dst.pathobj / 'secure.1').read_text(), '1')
Example #3
0
def test_auto_data_transfer(path):
    path = Path(path)
    ds_a = Dataset(path / "a").create()
    (ds_a.pathobj / "foo.dat").write_text("foo")
    ds_a.save()

    # Should be the default, but just in case.
    ds_a.repo.config.set("annex.numcopies", "1", where="local")
    ds_a.create_sibling(str(path / "b"), name="b")

    # With numcopies=1, no data is copied with data="auto".
    res = ds_a.push(to="b", data="auto", since=None)
    assert_not_in_results(res, action="copy")

    # Even when a file is explicitly given.
    res = ds_a.push(to="b", path="foo.dat", data="auto", since=None)
    assert_not_in_results(res, action="copy")

    # numcopies=2 changes that.
    ds_a.repo.config.set("annex.numcopies", "2", where="local")
    res = ds_a.push(to="b", data="auto", since=None)
    assert_in_results(res,
                      action="copy",
                      target="b",
                      status="ok",
                      path=str(ds_a.pathobj / "foo.dat"))

    # --since= limits the files considered by --auto.
    (ds_a.pathobj / "bar.dat").write_text("bar")
    ds_a.save()
    (ds_a.pathobj / "baz.dat").write_text("baz")
    ds_a.save()
    res = ds_a.push(to="b", data="auto", since="HEAD~1")
    assert_not_in_results(res,
                          action="copy",
                          path=str(ds_a.pathobj / "bar.dat"))
    assert_in_results(res,
                      action="copy",
                      target="b",
                      status="ok",
                      path=str(ds_a.pathobj / "baz.dat"))

    # --auto also considers preferred content.
    ds_a.repo.config.unset("annex.numcopies", where="local")
    ds_a.repo.set_preferred_content("wanted", "nothing", remote="b")
    res = ds_a.push(to="b", data="auto", since=None)
    assert_not_in_results(res,
                          action="copy",
                          path=str(ds_a.pathobj / "bar.dat"))

    ds_a.repo.set_preferred_content("wanted", "anything", remote="b")
    res = ds_a.push(to="b", data="auto", since=None)
    assert_in_results(res,
                      action="copy",
                      target="b",
                      status="ok",
                      path=str(ds_a.pathobj / "bar.dat"))
Example #4
0
def test_publish_target_url(src, desttop, desturl):
    # https://github.com/datalad/datalad/issues/1762
    ds = Dataset(src).create(force=True)
    ds.save('1')
    ds.create_sibling('ssh://datalad-test:%s/subdir' % desttop,
                      name='target',
                      target_url=desturl + 'subdir/.git')
    results = ds.push(to='target')
    assert results
    ok_file_has_content(Path(desttop, 'subdir', '1'), '123')
Example #5
0
def test_publish_target_url(src, desttop, desturl):
    # https://github.com/datalad/datalad/issues/1762
    ds = Dataset(src).create(force=True)
    if ds.repo.is_managed_branch():
        raise SkipTest(
            'Skipped due to https://github.com/datalad/datalad/issues/4075')
    ds.save('1')
    ds.create_sibling('ssh://localhost:%s/subdir' % desttop,
                      name='target',
                      target_url=desturl + 'subdir/.git')
    results = ds.push(to='target')
    assert results
    ok_file_has_content(Path(desttop, 'subdir', '1'), '123')
Example #6
0
def test_push_wanted(srcpath, dstpath):
    src = Dataset(srcpath).create()
    (src.pathobj / 'data.0').write_text('0')
    (src.pathobj / 'secure.1').write_text('1')
    (src.pathobj / 'secure.2').write_text('2')
    src.save()

    # Dropping a file to mimic a case of simply not having it locally (thus not
    # to be "pushed")
    src.drop('secure.2', check=False)

    # Annotate sensitive content, actual value "verysecure" does not matter in
    # this example
    src.repo.set_metadata(add={'distribution-restrictions': 'verysecure'},
                          files=['secure.1', 'secure.2'])

    src.create_sibling(
        dstpath,
        annex_wanted="not metadata=distribution-restrictions=*",
        name='target',
    )
    # check that wanted is obeyed, since set in sibling configuration
    res = src.push(to='target')
    assert_in_results(res,
                      action='copy',
                      path=str(src.pathobj / 'data.0'),
                      status='ok')
    for p in ('secure.1', 'secure.2'):
        assert_not_in_results(res, path=str(src.pathobj / p))
    assert_status('notneeded', src.push(to='target'))

    # check the target to really make sure
    dst = Dataset(dstpath)
    # normal file, yes
    eq_((dst.pathobj / 'data.0').read_text(), '0')
    # secure file, no
    if dst.repo.is_managed_branch():
        neq_((dst.pathobj / 'secure.1').read_text(), '1')
    else:
        assert_raises(FileNotFoundError, (dst.pathobj / 'secure.1').read_text)

    # reset wanted config, which must enable push of secure file
    src.repo.set_preferred_content('wanted', '', remote='target')
    res = src.push(to='target')
    assert_in_results(res, path=str(src.pathobj / 'secure.1'))
    eq_((dst.pathobj / 'secure.1').read_text(), '1')
Example #7
0
def test_publish_aggregated(path):
    base = Dataset(opj(path, 'origin')).create(force=True)
    # force all metadata objects into the annex
    with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f:
        f.write(
            '** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n'
        )
    base.create('sub', force=True)
    base.save(recursive=True)
    assert_repo_status(base.path)
    base.aggregate_metadata(recursive=True, update_mode='all')
    assert_repo_status(base.path)

    # create sibling and publish to it
    # Note: Use realpath() below because we know that the resolved temporary
    # test directory exists in the target (many tests rely on that), but it
    # doesn't necessarily have the unresolved variant.
    spath = op.realpath(opj(path, 'remote'))
    base.create_sibling(name="local_target",
                        sshurl="ssh://datalad-test",
                        target_dir=spath)
    base.publish('.', to='local_target', transfer_data='all')
    remote = Dataset(spath)
    objpath = opj('.datalad', 'metadata', 'objects')
    objs = list(sorted(base.repo.find(objpath)))
    # all object files a present in both datasets
    eq_(all(base.repo.file_has_content(objs)), True)
    eq_(all(remote.repo.file_has_content(objs)), True)
    # and we can squeeze the same metadata out
    eq_(
        [{
            k: v
            for k, v in i.items() if k not in ('path', 'refds', 'parentds')
        } for i in base.metadata('sub')],
        [{
            k: v
            for k, v in i.items() if k not in ('path', 'refds', 'parentds')
        } for i in remote.metadata('sub')],
    )
Example #8
0
def test_publish_aggregated(path):
    base = Dataset(opj(path, 'origin')).create(force=True)
    # force all metadata objects into the annex
    with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f:
        f.write(
            '** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n'
        )
    base.create('sub', force=True)
    base.add('.', recursive=True)
    ok_clean_git(base.path)
    base.aggregate_metadata(recursive=True, update_mode='all')
    ok_clean_git(base.path)

    # create sibling and publish to it
    spath = opj(path, 'remote')
    base.create_sibling(name="local_target",
                        sshurl="ssh://localhost",
                        target_dir=spath)
    base.publish('.', to='local_target', transfer_data='all')
    remote = Dataset(spath)
    objpath = opj('.datalad', 'metadata', 'objects')
    objs = [o for o in sorted(base.repo.find(objpath).split('\n')) if o]
    # all object files a present in both datasets
    eq_(all(base.repo.file_has_content(objs)), True)
    eq_(all(remote.repo.file_has_content(objs)), True)
    # and we can squeeze the same metadata out
    eq_(
        [{
            k: v
            for k, v in i.items() if k not in ('path', 'refds', 'parentds')
        } for i in base.metadata('sub')],
        [{
            k: v
            for k, v in i.items() if k not in ('path', 'refds', 'parentds')
        } for i in remote.metadata('sub')],
    )
Example #9
0
def test_auto_if_wanted_data_transfer_path_restriction(path):
    path = Path(path)
    ds_a = Dataset(path / "a").create()
    ds_a_sub0 = ds_a.create("sub0")
    ds_a_sub1 = ds_a.create("sub1")

    for ds in [ds_a, ds_a_sub0, ds_a_sub1]:
        (ds.pathobj / "sec.dat").write_text("sec")
        (ds.pathobj / "reg.dat").write_text("reg")
    ds_a.save(recursive=True)

    ds_a.create_sibling(
        str(path / "b"),
        name="b",
        annex_wanted="not metadata=distribution-restrictions=*",
        recursive=True)
    for ds in [ds_a, ds_a_sub0, ds_a_sub1]:
        ds.repo.set_metadata(add={"distribution-restrictions": "doesntmatter"},
                             files=["sec.dat"])

    # wanted-triggered --auto can be restricted to subdataset...
    res = ds_a.push(to="b", path="sub0", data="auto-if-wanted", recursive=True)
    assert_not_in_results(res,
                          action="copy",
                          target="b",
                          status="ok",
                          path=str(ds_a.pathobj / "reg.dat"))
    assert_in_results(res,
                      action="copy",
                      target="b",
                      status="ok",
                      path=str(ds_a_sub0.pathobj / "reg.dat"))
    assert_not_in_results(res,
                          action="copy",
                          target="b",
                          status="ok",
                          path=str(ds_a_sub0.pathobj / "sec.dat"))
    assert_not_in_results(res,
                          action="copy",
                          target="b",
                          status="ok",
                          path=str(ds_a_sub1.pathobj / "reg.dat"))

    # ... and to a wanted file.
    res = ds_a.push(to="b",
                    path="reg.dat",
                    data="auto-if-wanted",
                    recursive=True)
    assert_in_results(res,
                      action="copy",
                      target="b",
                      status="ok",
                      path=str(ds_a.pathobj / "reg.dat"))
    assert_not_in_results(res,
                          action="copy",
                          target="b",
                          status="ok",
                          path=str(ds_a_sub1.pathobj / "reg.dat"))

    # But asking to transfer a file does not do it if the remote has a
    # wanted setting and doesn't want it.
    res = ds_a.push(to="b",
                    path="sec.dat",
                    data="auto-if-wanted",
                    recursive=True)
    assert_not_in_results(res,
                          action="copy",
                          target="b",
                          status="ok",
                          path=str(ds_a.pathobj / "sec.dat"))

    res = ds_a.push(to="b", path="sec.dat", data="anything", recursive=True)
    assert_in_results(res,
                      action="copy",
                      target="b",
                      status="ok",
                      path=str(ds_a.pathobj / "sec.dat"))