def test_publish_aggregated(path): base = Dataset(opj(path, 'origin')).create(force=True) # force all metadata objects into the annex with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f: f.write( '** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n') base.create('sub', force=True) base.add('.', recursive=True) ok_clean_git(base.path) base.aggregate_metadata(recursive=True, update_mode='all') ok_clean_git(base.path) # create sibling and publish to it spath = opj(path, 'remote') base.create_sibling( name="local_target", sshurl="ssh://localhost", target_dir=spath) base.publish('.', to='local_target', transfer_data='all') remote = Dataset(spath) objpath = opj('.datalad', 'metadata', 'objects') objs = list(sorted(base.repo.find(objpath))) # all object files a present in both datasets eq_(all(base.repo.file_has_content(objs)), True) eq_(all(remote.repo.file_has_content(objs)), True) # and we can squeeze the same metadata out eq_( [{k: v for k, v in i.items() if k not in ('path', 'refds', 'parentds')} for i in base.metadata('sub')], [{k: v for k, v in i.items() if k not in ('path', 'refds', 'parentds')} for i in remote.metadata('sub')], )
def test_push_wanted(srcpath, dstpath): src = Dataset(srcpath).create() if src.repo.is_managed_branch(): # on crippled FS post-update hook enabling via create-sibling doesn't # work ATM raise SkipTest("no create-sibling on crippled FS") (src.pathobj / 'data.0').write_text('0') (src.pathobj / 'secure.1').write_text('1') (src.pathobj / 'secure.2').write_text('2') src.save() # Dropping a file to mimic a case of simply not having it locally (thus not # to be "pushed") src.drop('secure.2', check=False) # Annotate sensitive content, actual value "verysecure" does not matter in # this example src.repo.set_metadata(add={'distribution-restrictions': 'verysecure'}, files=['secure.1', 'secure.2']) src.create_sibling( dstpath, annex_wanted="not metadata=distribution-restrictions=*", name='target', ) # check that wanted is obeyed, if instructed by configuration src.config.set('datalad.push.copy-auto-if-wanted', 'true', where='local') res = src.push(to='target') assert_in_results(res, action='copy', path=str(src.pathobj / 'data.0'), status='ok') for p in ('secure.1', 'secure.2'): assert_not_in_results(res, path=str(src.pathobj / p)) assert_status('notneeded', src.push(to='target')) # check that dataset-config cannot overrule this src.config.set('datalad.push.copy-auto-if-wanted', 'false', where='dataset') res = src.push(to='target') assert_status('notneeded', res) # check the target to really make sure dst = Dataset(dstpath) # normal file, yes eq_((dst.pathobj / 'data.0').read_text(), '0') # secure file, no if dst.repo.is_managed_branch(): neq_((dst.pathobj / 'secure.1').read_text(), '1') else: assert_raises(FileNotFoundError, (dst.pathobj / 'secure.1').read_text) # remove local config, must enable push of secure file src.config.unset('datalad.push.copy-auto-if-wanted', where='local') res = src.push(to='target') assert_in_results(res, path=str(src.pathobj / 'secure.1')) eq_((dst.pathobj / 'secure.1').read_text(), '1')
def test_auto_data_transfer(path): path = Path(path) ds_a = Dataset(path / "a").create() (ds_a.pathobj / "foo.dat").write_text("foo") ds_a.save() # Should be the default, but just in case. ds_a.repo.config.set("annex.numcopies", "1", where="local") ds_a.create_sibling(str(path / "b"), name="b") # With numcopies=1, no data is copied with data="auto". res = ds_a.push(to="b", data="auto", since=None) assert_not_in_results(res, action="copy") # Even when a file is explicitly given. res = ds_a.push(to="b", path="foo.dat", data="auto", since=None) assert_not_in_results(res, action="copy") # numcopies=2 changes that. ds_a.repo.config.set("annex.numcopies", "2", where="local") res = ds_a.push(to="b", data="auto", since=None) assert_in_results(res, action="copy", target="b", status="ok", path=str(ds_a.pathobj / "foo.dat")) # --since= limits the files considered by --auto. (ds_a.pathobj / "bar.dat").write_text("bar") ds_a.save() (ds_a.pathobj / "baz.dat").write_text("baz") ds_a.save() res = ds_a.push(to="b", data="auto", since="HEAD~1") assert_not_in_results(res, action="copy", path=str(ds_a.pathobj / "bar.dat")) assert_in_results(res, action="copy", target="b", status="ok", path=str(ds_a.pathobj / "baz.dat")) # --auto also considers preferred content. ds_a.repo.config.unset("annex.numcopies", where="local") ds_a.repo.set_preferred_content("wanted", "nothing", remote="b") res = ds_a.push(to="b", data="auto", since=None) assert_not_in_results(res, action="copy", path=str(ds_a.pathobj / "bar.dat")) ds_a.repo.set_preferred_content("wanted", "anything", remote="b") res = ds_a.push(to="b", data="auto", since=None) assert_in_results(res, action="copy", target="b", status="ok", path=str(ds_a.pathobj / "bar.dat"))
def test_publish_target_url(src, desttop, desturl): # https://github.com/datalad/datalad/issues/1762 ds = Dataset(src).create(force=True) ds.save('1') ds.create_sibling('ssh://datalad-test:%s/subdir' % desttop, name='target', target_url=desturl + 'subdir/.git') results = ds.push(to='target') assert results ok_file_has_content(Path(desttop, 'subdir', '1'), '123')
def test_publish_target_url(src, desttop, desturl): # https://github.com/datalad/datalad/issues/1762 ds = Dataset(src).create(force=True) if ds.repo.is_managed_branch(): raise SkipTest( 'Skipped due to https://github.com/datalad/datalad/issues/4075') ds.save('1') ds.create_sibling('ssh://localhost:%s/subdir' % desttop, name='target', target_url=desturl + 'subdir/.git') results = ds.push(to='target') assert results ok_file_has_content(Path(desttop, 'subdir', '1'), '123')
def test_push_wanted(srcpath, dstpath): src = Dataset(srcpath).create() (src.pathobj / 'data.0').write_text('0') (src.pathobj / 'secure.1').write_text('1') (src.pathobj / 'secure.2').write_text('2') src.save() # Dropping a file to mimic a case of simply not having it locally (thus not # to be "pushed") src.drop('secure.2', check=False) # Annotate sensitive content, actual value "verysecure" does not matter in # this example src.repo.set_metadata(add={'distribution-restrictions': 'verysecure'}, files=['secure.1', 'secure.2']) src.create_sibling( dstpath, annex_wanted="not metadata=distribution-restrictions=*", name='target', ) # check that wanted is obeyed, since set in sibling configuration res = src.push(to='target') assert_in_results(res, action='copy', path=str(src.pathobj / 'data.0'), status='ok') for p in ('secure.1', 'secure.2'): assert_not_in_results(res, path=str(src.pathobj / p)) assert_status('notneeded', src.push(to='target')) # check the target to really make sure dst = Dataset(dstpath) # normal file, yes eq_((dst.pathobj / 'data.0').read_text(), '0') # secure file, no if dst.repo.is_managed_branch(): neq_((dst.pathobj / 'secure.1').read_text(), '1') else: assert_raises(FileNotFoundError, (dst.pathobj / 'secure.1').read_text) # reset wanted config, which must enable push of secure file src.repo.set_preferred_content('wanted', '', remote='target') res = src.push(to='target') assert_in_results(res, path=str(src.pathobj / 'secure.1')) eq_((dst.pathobj / 'secure.1').read_text(), '1')
def test_publish_aggregated(path): base = Dataset(opj(path, 'origin')).create(force=True) # force all metadata objects into the annex with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f: f.write( '** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n' ) base.create('sub', force=True) base.save(recursive=True) assert_repo_status(base.path) base.aggregate_metadata(recursive=True, update_mode='all') assert_repo_status(base.path) # create sibling and publish to it # Note: Use realpath() below because we know that the resolved temporary # test directory exists in the target (many tests rely on that), but it # doesn't necessarily have the unresolved variant. spath = op.realpath(opj(path, 'remote')) base.create_sibling(name="local_target", sshurl="ssh://datalad-test", target_dir=spath) base.publish('.', to='local_target', transfer_data='all') remote = Dataset(spath) objpath = opj('.datalad', 'metadata', 'objects') objs = list(sorted(base.repo.find(objpath))) # all object files a present in both datasets eq_(all(base.repo.file_has_content(objs)), True) eq_(all(remote.repo.file_has_content(objs)), True) # and we can squeeze the same metadata out eq_( [{ k: v for k, v in i.items() if k not in ('path', 'refds', 'parentds') } for i in base.metadata('sub')], [{ k: v for k, v in i.items() if k not in ('path', 'refds', 'parentds') } for i in remote.metadata('sub')], )
def test_publish_aggregated(path): base = Dataset(opj(path, 'origin')).create(force=True) # force all metadata objects into the annex with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f: f.write( '** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n' ) base.create('sub', force=True) base.add('.', recursive=True) ok_clean_git(base.path) base.aggregate_metadata(recursive=True, update_mode='all') ok_clean_git(base.path) # create sibling and publish to it spath = opj(path, 'remote') base.create_sibling(name="local_target", sshurl="ssh://localhost", target_dir=spath) base.publish('.', to='local_target', transfer_data='all') remote = Dataset(spath) objpath = opj('.datalad', 'metadata', 'objects') objs = [o for o in sorted(base.repo.find(objpath).split('\n')) if o] # all object files a present in both datasets eq_(all(base.repo.file_has_content(objs)), True) eq_(all(remote.repo.file_has_content(objs)), True) # and we can squeeze the same metadata out eq_( [{ k: v for k, v in i.items() if k not in ('path', 'refds', 'parentds') } for i in base.metadata('sub')], [{ k: v for k, v in i.items() if k not in ('path', 'refds', 'parentds') } for i in remote.metadata('sub')], )
def test_auto_if_wanted_data_transfer_path_restriction(path): path = Path(path) ds_a = Dataset(path / "a").create() ds_a_sub0 = ds_a.create("sub0") ds_a_sub1 = ds_a.create("sub1") for ds in [ds_a, ds_a_sub0, ds_a_sub1]: (ds.pathobj / "sec.dat").write_text("sec") (ds.pathobj / "reg.dat").write_text("reg") ds_a.save(recursive=True) ds_a.create_sibling( str(path / "b"), name="b", annex_wanted="not metadata=distribution-restrictions=*", recursive=True) for ds in [ds_a, ds_a_sub0, ds_a_sub1]: ds.repo.set_metadata(add={"distribution-restrictions": "doesntmatter"}, files=["sec.dat"]) # wanted-triggered --auto can be restricted to subdataset... res = ds_a.push(to="b", path="sub0", data="auto-if-wanted", recursive=True) assert_not_in_results(res, action="copy", target="b", status="ok", path=str(ds_a.pathobj / "reg.dat")) assert_in_results(res, action="copy", target="b", status="ok", path=str(ds_a_sub0.pathobj / "reg.dat")) assert_not_in_results(res, action="copy", target="b", status="ok", path=str(ds_a_sub0.pathobj / "sec.dat")) assert_not_in_results(res, action="copy", target="b", status="ok", path=str(ds_a_sub1.pathobj / "reg.dat")) # ... and to a wanted file. res = ds_a.push(to="b", path="reg.dat", data="auto-if-wanted", recursive=True) assert_in_results(res, action="copy", target="b", status="ok", path=str(ds_a.pathobj / "reg.dat")) assert_not_in_results(res, action="copy", target="b", status="ok", path=str(ds_a_sub1.pathobj / "reg.dat")) # But asking to transfer a file does not do it if the remote has a # wanted setting and doesn't want it. res = ds_a.push(to="b", path="sec.dat", data="auto-if-wanted", recursive=True) assert_not_in_results(res, action="copy", target="b", status="ok", path=str(ds_a.pathobj / "sec.dat")) res = ds_a.push(to="b", path="sec.dat", data="anything", recursive=True) assert_in_results(res, action="copy", target="b", status="ok", path=str(ds_a.pathobj / "sec.dat"))