def test_add_subdataset(path, other): subds = create(op.join(path, 'dir'), force=True) ds = create(path, force=True) ok_(subds.repo.dirty) ok_(ds.repo.dirty) assert_not_in('dir', ds.subdatasets(result_xfm='relpaths')) # "add everything in subds to subds" save(dataset=subds.path) assert_repo_status(subds.path) assert_not_in('dir', ds.subdatasets(result_xfm='relpaths')) # but with a base directory we add the dataset subds as a subdataset # to ds res = ds.save(subds.path) assert_in_results(res, action="add", path=subds.path, refds=ds.path) assert_in('dir', ds.subdatasets(result_xfm='relpaths')) # create another one other = create(other) # install into superdataset, but don't add other_clone = install(source=other.path, path=op.join(ds.path, 'other')) # little dance to get the revolution-type dataset other_clone = Dataset(other_clone.path) ok_(other_clone.is_installed) assert_not_in('other', ds.subdatasets(result_xfm='relpaths')) # now add, it should pick up the source URL ds.save('other') # and that is why, we can reobtain it from origin ds.uninstall('other') ok_(not other_clone.is_installed()) ds.get('other') ok_(other_clone.is_installed())
def test_symlinked_relpath(path): # initially ran into on OSX https://github.com/datalad/datalad/issues/2406 os.makedirs(op.join(path, "origin")) dspath = op.join(path, "linked") os.symlink('origin', dspath) ds = Dataset(dspath).create() create_tree(dspath, { "mike1": 'mike1', # will be added from topdir "later": "later", # later from within subdir "d": { "mike2": 'mike2', # to be added within subdir } }) # in the root of ds with chpwd(dspath): ds.repo.add("mike1", git=True) ds.save(message="committing", path="./mike1") # Let's also do in subdirectory as CWD, check that relative path # given to a plain command (not dataset method) are treated as # relative to CWD with chpwd(op.join(dspath, 'd')): save(dataset=ds.path, message="committing", path="mike2") later = op.join(op.pardir, "later") ds.repo.add(later, git=True) save(dataset=ds.path, message="committing", path=later) assert_repo_status(dspath)
def test_subsuperdataset_save(path): # Verify that when invoked without recursion save does not # cause querying of subdatasets of the subdataset # see https://github.com/datalad/datalad/issues/4523 parent = Dataset(path).create() # Create 3 levels of subdatasets so later to check operation # with or without --dataset being specified sub1 = parent.create('sub1') sub2 = parent.create(sub1.pathobj / 'sub2') sub3 = parent.create(sub2.pathobj / 'sub3') assert_repo_status(path) # now we will lobotomize that sub2 so git would fail if any query is performed. rmtree(str(sub3.pathobj / '.git' / 'objects')) # the call should proceed fine since neither should care about sub3 # default is no recursion parent.save('sub1') sub1.save('sub2') assert_raises(CommandError, parent.save, 'sub1', recursive=True) # and should fail if we request saving while in the parent directory # but while not providing a dataset, since operation would run within # pointed subdataset with chpwd(sub1.path): assert_raises(CommandError, save, 'sub2') # but should not fail in the top level superdataset with chpwd(parent.path): save('sub1')
def test_subdataset_save(path): parent = Dataset(path).create() sub = parent.create('sub') ok_clean_git(parent.path) create_tree(parent.path, {"untracked": 'ignore', 'sub': {"new": "wanted"}}) sub.add('new') # defined state: one untracked, modified (but clean in itself) subdataset ok_clean_git(sub.path) ok_clean_git(parent.path, untracked=['untracked'], index_modified=['sub']) # `save sub` does not save the parent!! with chpwd(parent.path): assert_status('notneeded', save(path=sub.path)) ok_clean_git(parent.path, untracked=['untracked'], index_modified=['sub']) # `save -d .` saves the state change in the subdataset, but leaves any untracked # content alone with chpwd(parent.path): assert_status('ok', parent.save()) ok_clean_git(parent.path, untracked=['untracked']) # get back to the original modified state and check that -S behaves in # exactly the same way create_tree(parent.path, {'sub': {"new2": "wanted2"}}) sub.add('new2') ok_clean_git(parent.path, untracked=['untracked'], index_modified=['sub']) with chpwd(parent.path): assert_status( # notneeded to save sub, but need to save parent ['ok', 'notneeded'], # the key condition of this test is that no reference dataset is # given! save(path='sub', super_datasets=True)) # save super must not cause untracked content to be commited! ok_clean_git(parent.path, untracked=['untracked'])
def test_symlinked_relpath(path): # initially ran into on OSX https://github.com/datalad/datalad/issues/2406 os.makedirs(op.join(path, "origin")) dspath = op.join(path, "linked") os.symlink('origin', dspath) ds = Dataset(dspath).create() create_tree( dspath, { "mike1": 'mike1', # will be added from topdir "later": "later", # later from within subdir "d": { "mike2": 'mike2', # to be added within subdir } }) # in the root of ds with chpwd(dspath): ds.repo.add("mike1", git=True) ds.save(message="committing", path="./mike1") # Let's also do in subdirectory as CWD, check that relative path # given to a plain command (not dataset method) are treated as # relative to CWD with chpwd(op.join(dspath, 'd')): save(dataset=ds.path, message="committing", path="mike2") later = op.join(op.pardir, "later") ds.repo.add(later, git=True) save(dataset=ds.path, message="committing", path=later) assert_repo_status(dspath)
def test_gh2043p1(path): # this tests documents the interim agreement on what should happen # in the case documented in gh-2043 ds = Dataset(path).create(force=True) ds.save('1') assert_repo_status(ds.path, untracked=['2', '3']) ds.unlock('1') assert_repo_status( ds.path, # on windows we are in an unlocked branch by default, hence # we would see no change modified=[] if on_windows else ['1'], untracked=['2', '3']) # save(.) should recommit unlocked file, and not touch anything else # this tests the second issue in #2043 with chpwd(path): # only save modified bits save(path='.', updated=True) # state of the file (unlocked/locked) is committed as well, and the # test doesn't lock the file again assert_repo_status(ds.path, untracked=['2', '3']) with chpwd(path): # but when a path is given, anything that matches this path # untracked or not is added/saved save(path='.') # state of the file (unlocked/locked) is committed as well, and the # test doesn't lock the file again assert_repo_status(ds.path)
def test_subsuperdataset_save(path): # Verify that when invoked without recursion save does not # cause querying of subdatasets of the subdataset # see https://github.com/datalad/datalad/issues/4523 parent = Dataset(path).create() # Create 3 levels of subdatasets so later to check operation # with or without --dataset being specified sub1 = parent.create('sub1') sub2 = parent.create(sub1.pathobj / 'sub2') sub3 = parent.create(sub2.pathobj / 'sub3') assert_repo_status(path) # now we will lobotomize that sub3 so git would fail if any query is performed. (sub3.pathobj / '.git' / 'config').chmod(0o000) try: sub3.repo.call_git(['ls-files'], read_only=True) raise SkipTest except CommandError: # desired outcome pass # the call should proceed fine since neither should care about sub3 # default is no recursion parent.save('sub1') sub1.save('sub2') assert_raises(CommandError, parent.save, 'sub1', recursive=True) # and should not fail in the top level superdataset with chpwd(parent.path): save('sub1') # or in a subdataset above the problematic one with chpwd(sub1.path): save('sub2')
def test_relpath_add(path): ds = Dataset(path).create(force=True) with chpwd(op.join(path, 'dir')): eq_(save('testindir')[0]['path'], op.join(ds.path, 'dir', 'testindir')) # and now add all save('..') # auto-save enabled assert_repo_status(ds.path)
def make_pretend_repo(dirname): os.chdir(dirname) datalad.create(str(dirname), force=True) rev_log = datalad.save("add data", str(dirname)) (dirname / "useless.txt").write_text("who me.") datalad.save("add superfluous change", str(dirname)) (dirname / "useless.txt").unlink() datalad.save("make things better", str(dirname))
def test_path_arg_call(path): ds = create(path) for testfile in (ds.pathobj / 'abs.txt', ds.pathobj / 'rel.txt'): testfile.write_text(u'123') # we used to resolve relative paths against a dataset just given by # a path, but we no longer do that #save(dataset=ds.path, path=[testfile.name], to_git=True) save(dataset=ds, path=[testfile.name], to_git=True)
def test_bf2043p2(path): ds = Dataset(path).create(force=True) ds.repo.add('staged') assert_repo_status(ds.path, added=['staged'], untracked=['untracked']) # save -u does not commit untracked content # this tests the second issue in #2043 with chpwd(path): save(updated=True) assert_repo_status(ds.path, untracked=['untracked'])
def test_bf2043p2(path): ds = Dataset(path).create(force=True) ds.add('staged', save=False) ok_clean_git(ds.path, head_modified=['staged'], untracked=['untracked']) # plain save does not commit untracked content # this tests the second issue in #2043 with chpwd(path): save() ok_clean_git(ds.path, untracked=['untracked'])
def test_path_arg_call(path): ds = create(path) for testfile in ( ds.pathobj / 'abs.txt', ds.pathobj / 'rel.txt'): testfile.write_text(u'123') # we used to resolve relative paths against a dataset just given by # a path, but we no longer do that #save(dataset=ds.path, path=[testfile.name], to_git=True) save(dataset=ds, path=[testfile.name], to_git=True)
def test_save_directory(path): # Sequence of save invocations on subdirectories. ds = Dataset(path).create(force=True) ds.save(path='sdir1') ok_clean_git(ds.path, untracked=['sdir2/foo', 'sdir3/sdir/subsub/foo']) # There is also difference from with chpwd(path): save(path='sdir2') ok_clean_git(ds.path, untracked=['sdir3/sdir/subsub/foo']) with chpwd(opj(path, 'sdir3')): save(path='sdir') ok_clean_git(ds.path)
def test_update_known_submodule(path): def get_baseline(p): ds = Dataset(p).create() sub = create(str(ds.pathobj / 'sub')) assert_repo_status(ds.path, untracked=['sub']) return ds # attempt one ds = get_baseline(op.join(path, 'wo_ref')) with chpwd(ds.path): save(recursive=True) assert_repo_status(ds.path) # attempt two, same as above but call add via reference dataset ds = get_baseline(op.join(path, 'w_ref')) ds.save(recursive=True) assert_repo_status(ds.path)
def test_subdataset_save(path): parent = Dataset(path).create() sub = parent.create('sub') assert_repo_status(parent.path) create_tree(parent.path, {"untracked": 'ignore', 'sub': {"new": "wanted"}}) sub.save('new') # defined state: one untracked, modified (but clean in itself) subdataset assert_repo_status(sub.path) assert_repo_status(parent.path, untracked=['untracked'], modified=['sub']) # `save sub` does not save the parent!! with chpwd(parent.path): assert_status('notneeded', save(dataset=sub.path)) assert_repo_status(parent.path, untracked=['untracked'], modified=['sub']) # `save -u .` saves the state change in the subdataset, # but leaves any untracked content alone with chpwd(parent.path): assert_status('ok', parent.save(updated=True)) assert_repo_status(parent.path, untracked=['untracked']) # get back to the original modified state and check that -S behaves in # exactly the same way create_tree(parent.path, {'sub': {"new2": "wanted2"}}) sub.save('new2') assert_repo_status(parent.path, untracked=['untracked'], modified=['sub'])
def test_update_known_submodule(path): def get_baseline(p): ds = Dataset(p).create() sub = create(text_type(ds.pathobj / 'sub')) assert_repo_status(ds.path, untracked=['sub']) return ds # attempt one ds = get_baseline(op.join(path, 'wo_ref')) with chpwd(ds.path): save(recursive=True) assert_repo_status(ds.path) # attempt two, same as above but call add via reference dataset ds = get_baseline(op.join(path, 'w_ref')) ds.save(recursive=True) assert_repo_status(ds.path)
def test_subdataset_save(path): parent = Dataset(path).create() sub = parent.create('sub') assert_repo_status(parent.path) create_tree(parent.path, { "untracked": 'ignore', 'sub': { "new": "wanted"}}) sub.save('new') # defined state: one untracked, modified (but clean in itself) subdataset assert_repo_status(sub.path) assert_repo_status(parent.path, untracked=['untracked'], modified=['sub']) # `save sub` does not save the parent!! with chpwd(parent.path): assert_status('notneeded', save(dataset=sub.path)) assert_repo_status(parent.path, untracked=['untracked'], modified=['sub']) # `save -u .` saves the state change in the subdataset, # but leaves any untracked content alone with chpwd(parent.path): assert_status('ok', parent.save(updated=True)) assert_repo_status(parent.path, untracked=['untracked']) # get back to the original modified state and check that -S behaves in # exactly the same way create_tree(parent.path, { 'sub': { "new2": "wanted2"}}) sub.save('new2') assert_repo_status(parent.path, untracked=['untracked'], modified=['sub'])
def test_bf2541(path): ds = create(path) subds = ds.create('sub') assert_repo_status(ds.path) os.symlink('sub', op.join(ds.path, 'symlink')) with chpwd(ds.path): res = save(recursive=True) assert_repo_status(ds.path)
def test_bf1886(path): parent = Dataset(path).create() parent.create('sub') assert_repo_status(parent.path) # create a symlink pointing down to the subdataset, and add it os.symlink('sub', op.join(parent.path, 'down')) parent.save('down') assert_repo_status(parent.path) # now symlink pointing up os.makedirs(op.join(parent.path, 'subdir', 'subsubdir')) os.symlink(op.join(op.pardir, 'sub'), op.join(parent.path, 'subdir', 'up')) parent.save(op.join('subdir', 'up')) # 'all' to avoid the empty dir being listed assert_repo_status(parent.path, untracked_mode='all') # now symlink pointing 2xup, as in #1886 os.symlink( op.join(op.pardir, op.pardir, 'sub'), op.join(parent.path, 'subdir', 'subsubdir', 'upup')) parent.save(op.join('subdir', 'subsubdir', 'upup')) assert_repo_status(parent.path) # simulatenously add a subds and a symlink pointing to it # create subds, but don't register it create(op.join(parent.path, 'sub2')) os.symlink( op.join(op.pardir, op.pardir, 'sub2'), op.join(parent.path, 'subdir', 'subsubdir', 'upup2')) parent.save(['sub2', op.join('subdir', 'subsubdir', 'upup2')]) assert_repo_status(parent.path) # full replication of #1886: the above but be in subdir of symlink # with no reference dataset create(op.join(parent.path, 'sub3')) os.symlink( op.join(op.pardir, op.pardir, 'sub3'), op.join(parent.path, 'subdir', 'subsubdir', 'upup3')) # need to use absolute paths with chpwd(op.join(parent.path, 'subdir', 'subsubdir')): save([op.join(parent.path, 'sub3'), op.join(parent.path, 'subdir', 'subsubdir', 'upup3')]) assert_repo_status(parent.path)
def test_subdataset_save(path): parent = Dataset(path).create() sub = parent.create('sub') ok_clean_git(parent.path) create_tree(parent.path, { "untracked": 'ignore', 'sub': { "new": "wanted"}}) sub.add('new') # defined state: one untracked, modified (but clean in itself) subdataset ok_clean_git(sub.path) ok_clean_git(parent.path, untracked=['untracked'], index_modified=['sub']) # `save sub` does not save the parent!! with chpwd(parent.path): assert_status('notneeded', save(path=sub.path)) ok_clean_git(parent.path, untracked=['untracked'], index_modified=['sub']) # `save -d .` saves the state change in the subdataset, but leaves any untracked # content alone with chpwd(parent.path): assert_status('ok', parent.save()) ok_clean_git(parent.path, untracked=['untracked']) # get back to the original modified state and check that -S behaves in # exactly the same way create_tree(parent.path, { 'sub': { "new2": "wanted2"}}) sub.add('new2') ok_clean_git(parent.path, untracked=['untracked'], index_modified=['sub']) with chpwd(parent.path): assert_status( # notneeded to save sub, but need to save parent ['ok', 'notneeded'], # the key condition of this test is that no reference dataset is # given! save(path='sub', super_datasets=True)) # save super must not cause untracked content to be commited! ok_clean_git(parent.path, untracked=['untracked'])
def test_gh2043p1(path): # this tests documents the interim agreement on what should happen # in the case documented in gh-2043 ds = Dataset(path).create(force=True) ds.add('1') ok_clean_git(ds.path, untracked=['2', '3']) ds.unlock('1') ok_clean_git(ds.path, index_modified=['1'], untracked=['2', '3']) # save(.) should recommit unlocked file, and not touch anything else # this tests the second issue in #2043 with chpwd(path): # only save modified bits by default save('.') # because the first arg is the dataset # state of the file (unlocked/locked) is committed as well, and the # test doesn't lock the file again skip_v6_or_later(method='pass')(ok_clean_git)(ds.path, untracked=['2', '3']) with chpwd(path): # but when a path is given, anything that matches this path # untracked or not is added/saved save(path='.') # state of the file (unlocked/locked) is committed as well, and the # test doesn't lock the file again skip_v6_or_later(method='pass')(ok_clean_git)(ds.path)
def save_output_to_repo(config_obj): base_comparison_dir_path = get_base_comparison_dir_path(config_obj) update_msg = "Update data with test run on {d}".format( d=dt.datetime.today().strftime("%Y-%m-%d")) result = datalad.save(update_msg, str(base_comparison_dir_path), on_failure="stop") sample_test_output = get_test_data_path() / "sample_test_output" data_message = ("New sample output was saved to {sample_test_output} for " "future comparisons. Consider publishing this new data to " "the publicly accessible servers.. ") print(data_message.format(**locals()))
def test_recursive_save(path): ds = Dataset(path).create() # nothing to save assert_false(ds.save()) subds = ds.create('sub') # subdataset presence already saved ok_clean_git(ds.path) subsubds = subds.create('subsub') assert_equal( ds.get_subdatasets(recursive=True, absolute=True, fulfilled=True), [subsubds.path, subds.path]) newfile_name = opj(subsubds.path, 'test') with open(newfile_name, 'w') as f: f.write('some') # saves the status change of the subdataset due to the subsubdataset addition assert_equal(ds.save(all_changes=True), [ds]) # make the new file known to its dataset # with #1141 this would be #ds.add(newfile_name, save=False) subsubds.add(newfile_name, save=False) # but remains dirty because of the untracked file down below assert ds.repo.dirty # auto-add will save nothing deep down without recursive assert_equal(ds.save(all_changes=True), []) assert ds.repo.dirty # with recursive pick up the change in subsubds assert_equal(ds.save(all_changes=True, recursive=True), [subsubds, subds, ds]) # modify content in subsub and try saving testfname = newfile_name subsubds.unlock(testfname) with open(opj(ds.path, testfname), 'w') as f: f.write('I am in here!') # the following should all do nothing # no auto_add assert_false(ds.save()) # no recursive assert_false(ds.save(all_changes=True)) # an explicit target saves only the corresponding dataset assert_equal(save(files=[testfname]), [subsubds]) # plain recursive without any files given will save the beast assert_equal(ds.save(recursive=True), [subds, ds]) # there is nothing else to save assert_false(ds.save(all_changes=True, recursive=True)) # one more time and check that all datasets in the hierarchy get updated states = [d.repo.get_hexsha() for d in (ds, subds, subsubds)] testfname = opj('sub', 'subsub', 'saveme2') with open(opj(ds.path, testfname), 'w') as f: f.write('I am in here!') assert_true(ds.save(all_changes=True, recursive=True)) newstates = [d.repo.get_hexsha() for d in (ds, subds, subsubds)] for old, new in zip(states, newstates): assert_not_equal(old, new) # now let's check saving "upwards" assert not subds.repo.dirty create_tree(subds.path, {"testnew": 'smth', "testadded": "added"}) subds.repo.add("testadded") indexed_files = subds.repo.get_indexed_files() assert subds.repo.dirty assert ds.repo.dirty assert not subsubds.repo.dirty create_tree(subsubds.path, {"testnew2": 'smth'}) assert subsubds.repo.dirty # and indexed files didn't change assert_equal(indexed_files, subds.repo.get_indexed_files()) ok_clean_git(subds.repo, untracked=['testnew'], index_modified=['subsub'], head_modified=['testadded']) subsubds.save(message="savingtestmessage", super_datasets=True, all_changes=True) ok_clean_git(subsubds.repo) # but its super should have got only the subsub saved # not the file we created ok_clean_git(subds.repo, untracked=['testnew'], head_modified=['testadded']) # check commits to have correct messages # there are no more dedicated superdataset-save commits anymore, because # superdatasets get saved as part of the processed hierarchy and can contain # other parts in the commit (if so instructed) assert_equal(next(subsubds.repo.get_branch_commits('master')).message.rstrip(), 'savingtestmessage') assert_equal(next(subds.repo.get_branch_commits('master')).message.rstrip(), 'savingtestmessage') assert_equal(next(ds.repo.get_branch_commits('master')).message.rstrip(), 'savingtestmessage')
def test_save(path): ds = Dataset(path) with open(op.join(path, "new_file.tst"), "w") as f: f.write("something") ds.repo.add("new_file.tst", git=True) ok_(ds.repo.dirty) ds.save(message="add a new file") assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo)) with open(op.join(path, "new_file.tst"), "w") as f: f.write("modify") ok_(ds.repo.dirty) ds.save(message="modified new_file.tst") assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo)) # save works without ds and files given in the PWD with open(op.join(path, "new_file.tst"), "w") as f: f.write("rapunzel") with chpwd(path): save(message="love rapunzel") assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo)) # and also without `-a` when things are staged with open(op.join(path, "new_file.tst"), "w") as f: f.write("exotic") ds.repo.add("new_file.tst", git=True) with chpwd(path): save(message="love marsians") assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo)) files = ['one.txt', 'two.txt'] for fn in files: with open(op.join(path, fn), "w") as f: f.write(fn) ds.save([op.join(path, f) for f in files]) # superfluous call to save (alll saved it already), should not fail # but report that nothing was saved assert_status('notneeded', ds.save(message="set of new files")) assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo)) # create subdataset subds = ds.create('subds') assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo)) # modify subds with open(op.join(subds.path, "some_file.tst"), "w") as f: f.write("something") subds.save() assert_repo_status(subds.path, annex=isinstance(subds.repo, AnnexRepo)) # ensure modified subds is committed ds.save() assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo)) # now introduce a change downstairs subds.create('someotherds') assert_repo_status(subds.path, annex=isinstance(subds.repo, AnnexRepo)) ok_(ds.repo.dirty) # and save via subdataset path ds.save('subds', version_tag='new_sub') assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo)) tags = ds.repo.get_tags() ok_(len(tags) == 1) eq_(tags[0], dict(hexsha=ds.repo.get_hexsha(), name='new_sub')) # fails when retagged, like git does res = ds.save(version_tag='new_sub', on_failure='ignore') assert_status('error', res) assert_result_count( res, 1, action='save', type='dataset', path=ds.path, message=('cannot tag this version: %s', "fatal: tag 'new_sub' already exists"))
def test_recursive_save(path): ds = Dataset(path).create() # nothing to save assert_false(ds.save()) subds = ds.create('sub') # subdataset presence already saved ok_clean_git(ds.path) subsubds = subds.create('subsub') assert_equal( ds.get_subdatasets(recursive=True, absolute=True, fulfilled=True), [subsubds.path, subds.path]) newfile_name = opj(subsubds.path, 'test') with open(newfile_name, 'w') as f: f.write('some') # saves the status change of the subdataset due to the subsubdataset addition assert_equal(ds.save(all_changes=True), [ds]) # make the new file known to its dataset # with #1141 this would be #ds.add(newfile_name, save=False) subsubds.add(newfile_name, save=False) # but remains dirty because of the untracked file down below assert ds.repo.dirty # auto-add will save nothing deep down without recursive assert_equal(ds.save(all_changes=True), []) assert ds.repo.dirty # with recursive pick up the change in subsubds assert_equal(ds.save(all_changes=True, recursive=True), [subsubds, subds, ds]) # modify content in subsub and try saving testfname = newfile_name subsubds.unlock(testfname) with open(opj(ds.path, testfname), 'w') as f: f.write('I am in here!') # the following should all do nothing # no auto_add assert_false(ds.save()) # no recursive assert_false(ds.save(all_changes=True)) # an explicit target saves only the corresponding dataset assert_equal(save(files=[testfname]), [subsubds]) # plain recursive without any files given will save the beast assert_equal(ds.save(recursive=True), [subds, ds]) # there is nothing else to save assert_false(ds.save(all_changes=True, recursive=True)) # one more time and check that all datasets in the hierarchy get updated states = [d.repo.get_hexsha() for d in (ds, subds, subsubds)] testfname = opj('sub', 'subsub', 'saveme2') with open(opj(ds.path, testfname), 'w') as f: f.write('I am in here!') assert_true(ds.save(all_changes=True, recursive=True)) newstates = [d.repo.get_hexsha() for d in (ds, subds, subsubds)] for old, new in zip(states, newstates): assert_not_equal(old, new) # now let's check saving "upwards" assert not subds.repo.dirty create_tree(subds.path, {"testnew": 'smth', "testadded": "added"}) subds.repo.add("testadded") indexed_files = subds.repo.get_indexed_files() assert subds.repo.dirty assert ds.repo.dirty assert not subsubds.repo.dirty create_tree(subsubds.path, {"testnew2": 'smth'}) assert subsubds.repo.dirty # and indexed files didn't change assert_equal(indexed_files, subds.repo.get_indexed_files()) ok_clean_git(subds.repo, untracked=['testnew'], index_modified=['subsub'], head_modified=['testadded']) subsubds.save(message="savingtestmessage", super_datasets=True, all_changes=True) ok_clean_git(subsubds.repo) # but its super should have got only the subsub saved # not the file we created ok_clean_git(subds.repo, untracked=['testnew'], head_modified=['testadded']) # check commits to have correct messages # there are no more dedicated superdataset-save commits anymore, because # superdatasets get saved as part of the processed hierarchy and can contain # other parts in the commit (if so instructed) assert_equal( next(subsubds.repo.get_branch_commits('master')).message.rstrip(), 'savingtestmessage') assert_equal( next(subds.repo.get_branch_commits('master')).message.rstrip(), 'savingtestmessage') assert_equal( next(ds.repo.get_branch_commits('master')).message.rstrip(), 'savingtestmessage')
def test_recursive_save(path): ds = Dataset(path).create() # nothing to save assert_status('notneeded', ds.save()) subds = ds.create('sub') # subdataset presence already saved ok_clean_git(ds.path) subsubds = subds.create('subsub') assert_equal( ds.subdatasets(recursive=True, fulfilled=True, result_xfm='paths'), [subds.path, subsubds.path]) newfile_name = opj(subsubds.path, 'test') with open(newfile_name, 'w') as f: f.write('some') # saves the status change of the subdataset due to the subsubdataset addition assert_result_values_equal( ds.save(result_filter=is_ok_dataset), 'path', [ds.path]) # make the new file known to its dataset ds.add(newfile_name, save=False) # but remains dirty because of the uncommited file down below assert ds.repo.dirty # auto-add will save nothing deep down without recursive assert_status('notneeded', ds.save()) assert ds.repo.dirty # with recursive pick up the change in subsubds assert_result_values_equal( ds.save(recursive=True, result_filter=is_ok_dataset), 'path', [subsubds.path, subds.path, ds.path]) # at this point the entire tree is clean ok_clean_git(ds.path) states = [d.repo.get_hexsha() for d in (ds, subds, subsubds)] # now we save recursively, nothing should happen res = ds.save(recursive=True) # we do not get any report from a subdataset, because we detect at the # very top that the entire tree is clean assert_result_count(res, 1) assert_result_count(res, 1, status='notneeded', action='save', path=ds.path) # now we introduce new files all the way down create_tree(subsubds.path, {"mike1": 'mike1'}) # because we cannot say from the top if there is anything to do down below, # we have to traverse and we will get reports for all dataset, but there is # nothing actually saved res = ds.save(recursive=True) assert_result_count(res, 3) assert_status('notneeded', res) subsubds_indexed = subsubds.repo.get_indexed_files() assert_not_in('mike1', subsubds_indexed) assert_equal(states, [d.repo.get_hexsha() for d in (ds, subds, subsubds)]) unlink(opj(subsubds.path, 'mike1')) ok_clean_git(ds.path) # modify content in subsub and try saving testfname = newfile_name subsubds.unlock(testfname) with open(opj(ds.path, testfname), 'w') as f: f.write('I am in here!') # the following should all do nothing # no auto_add assert_status('notneeded', ds.save()) # no recursive assert_status('notneeded', ds.save()) # an explicit target saves only the corresponding dataset assert_result_values_equal( save(path=[testfname]), 'path', [subsubds.path]) # plain recursive without any files given will save the beast assert_result_values_equal( ds.save(recursive=True, result_filter=is_ok_dataset), 'path', [subds.path, ds.path]) # there is nothing else to save assert_status('notneeded', ds.save(recursive=True)) ok_clean_git(ds.path) # one more time and check that all datasets in the hierarchy are not # contaminated with untracked files states = [d.repo.get_hexsha() for d in (ds, subds, subsubds)] testfname = opj('sub', 'subsub', 'saveme2') with open(opj(ds.path, testfname), 'w') as f: f.write('I am in here!') assert_status('notneeded', ds.save(recursive=True)) newstates = [d.repo.get_hexsha() for d in (ds, subds, subsubds)] for old, new in zip(states, newstates): assert_equal(old, new) assert ds.repo.dirty unlink(opj(ds.path, testfname)) ok_clean_git(ds.path) # now let's check saving "upwards" create_tree(subds.path, {"testnew": 'smth', "testadded": "added"}) subds.repo.add("testadded") indexed_files = subds.repo.get_indexed_files() assert subds.repo.dirty assert ds.repo.dirty assert not subsubds.repo.dirty create_tree(subsubds.path, {"testnew2": 'smth'}) assert subsubds.repo.dirty # and indexed files didn't change assert_equal(indexed_files, subds.repo.get_indexed_files()) ok_clean_git(subds.repo, untracked=['testnew'], index_modified=['subsub'], head_modified=['testadded']) old_states = [d.repo.get_hexsha() for d in (ds, subds, subsubds)] subsubds.save(message="savingtestmessage", super_datasets=True) # this save actually didn't save anything in subsub (or anywhere), # because there were only untracked bits pending for old, new in zip(old_states, [d.repo.get_hexsha() for d in (ds, subds, subsubds)]): assert_equal(old, new) # but now we are saving this untracked bit specifically subsubds.save(message="savingtestmessage", path=['testnew2'], super_datasets=True) ok_clean_git(subsubds.repo) # but its super should have got only the subsub saved # not the file we created ok_clean_git(subds.repo, untracked=['testnew'], head_modified=['testadded']) # check commits to have correct messages # there are no more dedicated superdataset-save commits anymore, because # superdatasets get saved as part of the processed hierarchy and can contain # other parts in the commit (if so instructed) assert_equal(next(subsubds.repo.get_branch_commits('master')).message.rstrip(), 'savingtestmessage') assert_equal(next(subds.repo.get_branch_commits('master')).message.rstrip(), 'savingtestmessage') assert_equal(next(ds.repo.get_branch_commits('master')).message.rstrip(), 'savingtestmessage') # and if we try to save while being within that subsubds path subsubds.unlock('testnew2') create_tree(subsubds.path, {"testnew2": 'smth2'}) # trying to replicate https://github.com/datalad/datalad/issues/1540 subsubds.save(message="saving new changes", all_updated=True) # no super with chpwd(subds.path): # no explicit dataset is provided by path is provided save(path=['subsub'], message='saving sub', super_datasets=True) # super should get it saved too assert_equal(next(ds.repo.get_branch_commits('master')).message.rstrip(), 'saving sub')
def test_save(path): ds = Dataset(path) with open(opj(path, "new_file.tst"), "w") as f: f.write("something") ds.repo.add("new_file.tst", git=True) ok_(ds.repo.dirty) ds.save("add a new file") ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo)) with open(opj(path, "new_file.tst"), "w") as f: f.write("modify") ok_(ds.repo.dirty) ds.save("modified new_file.tst") ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo)) # save works without ds and files given in the PWD with open(opj(path, "new_file.tst"), "w") as f: f.write("rapunzel") with chpwd(path): save("love rapunzel") ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo)) # and also without `-a` when things are staged with open(opj(path, "new_file.tst"), "w") as f: f.write("exotic") ds.repo.add("new_file.tst", git=True) with chpwd(path): save("love marsians") ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo)) files = ['one.txt', 'two.txt'] for fn in files: with open(opj(path, fn), "w") as f: f.write(fn) ds.add([opj(path, f) for f in files]) # superfluous call to save (add saved it already), should not fail # but report that nothing was saved assert_status('notneeded', ds.save("set of new files")) ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo)) # create subdataset subds = ds.create('subds') ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo)) # modify subds with open(opj(subds.path, "some_file.tst"), "w") as f: f.write("something") subds.add('.') ok_clean_git(subds.path, annex=isinstance(subds.repo, AnnexRepo)) # Note/TODO: ok_clean_git is failing in direct mode, due to staged but # uncommited .datalad (probably caused within create) ok_(ds.repo.dirty) # ensure modified subds is committed ds.save() ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo)) # now introduce a change downstairs subds.create('someotherds') ok_clean_git(subds.path, annex=isinstance(subds.repo, AnnexRepo)) ok_(ds.repo.dirty) # and save via subdataset path ds.save('subds') ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))
def add( template_id, osf_user, osf_password, osf_overwrite, gh_user, gh_token, path, nprocs, ): """Add a new template.""" from .io import run_command from .utils import copy_template import shutil from datalad import api as dl gh_password = getenv("GITHUB_PASSWORD") if not gh_user or not gh_token: raise click.BadParameter("Insufficient secrets to login into GitHub") path = Path(path or f"tpl-{template_id}").absolute() cwd = Path.cwd() if not path.exists(): raise click.UsageError(f"<{path}> does not exist.") metadata = {} # Check metadata if (path / "template_description.json").exists(): metadata = json.loads((path / "template_description.json").read_text()) metadata["Identifier"] = template_id # Check license license_path = path / "LICENSE" if not license_path.exists(): license_path = path / "LICENCE" if not license_path.exists(): license_path = path / "COPYING" if not license_path.exists(): license_prompt = click.prompt( text="""\ A LICENSE file MUST be distributed with the template. The TemplateFlow Manager can \ set a license (either CC0 or CC-BY) for you.""", type=click.Choice(("CC0", "CC-BY", "Custom (abort)")), default="Custom (abort)", ) if license_prompt == "Custom (abort)": raise click.UsageError( "Cannot proceed without a valid license. Please write a LICENSE " "file before uploading.") license_path = Path( pkgr_fn("tfmanager", f"data/{license_prompt}.LICENSE")) metadata["License"] = license_prompt # Check RRID if not metadata.get("RRID"): rrid = click.prompt( text="Has a RRID (research resource ID) already been assigned?", type=str, default='') or None if rrid: metadata["RRID"] = rrid # Check short description if not metadata.get("Name", "").strip(): short_desc = click.prompt( text="""\ The "Name" metadata is not found within the <template_description.json> file. \ Please provide a short description for this resource.""", type=str, ) if not short_desc: raise click.UsageError( "Cannot proceed without a short description.") metadata["Name"] = short_desc # Check authors authors_prompt = [ a.strip() for a in metadata.get("Authors", []) if a.strip() ] if not authors_prompt: authors_prompt = [ n.strip() for n in click.prompt( text="""\ The "Authors" metadata is not found within the <template_description.json> file. \ Please provide a list of authors separated by semicolon (;) in <Lastname Initial(s)> format.""", type=str, ).split(";") if n ] if not authors_prompt: click.confirm("No authors were given, do you want to continue?", abort=True) metadata["Authors"] = authors_prompt # Check references refs_prompt = [ f"""\ {'https://doi.org/' if not a.strip().startswith('http') else ''}\ {a.replace("doi:", "").strip()}""" for a in metadata.get("ReferencesAndLinks", []) if a.strip() ] if not refs_prompt: refs_prompt = [ n.replace('"', "").strip() for n in click.prompt( text="""\ The "ReferencesAndLinks" metadata is not found within the <template_description.json> file. \ Please provide a list of links and publications within double-quotes \ (for example, "doi:10.1101/2021.02.10.430678") and separated by spaces (< >).""", type=str, ).split(" ") if n ] if not refs_prompt: click.confirm("No authors were given, do you want to continue?", abort=True) metadata["ReferencesAndLinks"] = refs_prompt with TemporaryDirectory() as tmpdir: repodir = Path(tmpdir) / "templateflow" # Clone root <user>/templateflow project - fork if necessary click.echo(f"Preparing Pull-Request (wd={tmpdir}).") clone = run_command( f"git clone https://github.com/{gh_user}/templateflow.git " "--branch tpl-intake --single-branch", cwd=tmpdir, capture_output=False, ) if clone.returncode != 0: run_command( "hub clone templateflow/templateflow", cwd=tmpdir, capture_output=False, env={ "GITHUB_USER": gh_user, "GITHUB_PASSWORD": gh_password }, ) run_command( "hub fork --remote-name origin", cwd=str(repodir), capture_output=False, env={ "GITHUB_USER": gh_user, "GITHUB_PASSWORD": gh_password }, ) else: run_command( "git remote add upstream https://github.com/templateflow/templateflow.git", cwd=str(repodir), capture_output=False, ) chdir(repodir) # Create datalad dataset dl.create( path=f"tpl-{template_id}", cfg_proc="text2git", initopts={"initial-branch": "main"}, description=metadata["Name"], ) # Populate template copy_template( path=path, dest=repodir / f"tpl-{template_id}", ) # Copy license shutil.copy(license_path, repodir / f"tpl-{template_id}" / "LICENSE") # (Over)write template_description.json (repodir / f"tpl-{template_id}" / "template_description.json").write_text(json.dumps(metadata, indent=2)) # Init/update CHANGELOG changelog = repodir / f"tpl-{template_id}" / "CHANGES" changes = [ f""" ## {datetime.date.today().ctime()} - TemplateFlow Manager Upload Populated contents after NIfTI sanitizing by the TF Manager. """ ] if changelog.exists(): changes += [changelog.read_text()] changelog.write_text("\n".join(changes)) # Init OSF sibling rrid_str = f" (RRID: {metadata['RRID']})" if metadata.get( "RRID") else "" dl.create_sibling_osf( title=f"TemplateFlow resource: <{template_id}>{rrid_str}", name="osf", dataset=f"./tpl-{template_id}", public=True, category="data", description=metadata["Name"], tags=["TemplateFlow dataset", template_id]) # Init GH sibling dl.create_sibling_github(reponame=f"tpl-{template_id}", dataset=str(repodir / f"tpl-{template_id}"), github_login=gh_user, publish_depends="osf-storage", existing="replace", access_protocol="ssh") # Save added contents dl.save(dataset=str(repodir / f"tpl-{template_id}"), message="ADD: TemplateFlow Manager initialized contents") # Push to siblings dl.push( dataset=str(repodir / f"tpl-{template_id}"), to="github", jobs=cpu_count(), ) # Back home chdir(cwd) run_command( "git fetch upstream tpl-intake", cwd=str(repodir), capture_output=False, ) run_command( f"git checkout -b pr/tpl-{template_id} upstream/tpl-intake", cwd=str(repodir), capture_output=False, ) (repodir / f"{path.name}.toml").write_text( toml.dumps({ "github": { "user": gh_user }, })) run_command( f"git add {path.name}.toml", cwd=str(repodir), capture_output=False, ) run_command( f"git commit -m 'add(tpl-{template_id}): create intake file'", cwd=str(repodir), capture_output=False, ) run_command( f"git push -u origin pr/tpl-{template_id}", cwd=str(repodir), capture_output=False, env={ "GITHUB_USER": gh_user, "GITHUB_TOKEN": gh_token }, ) (repodir.parent / "message.md").write_text(f"""\ ADD: ``tpl-{template_id}`` ## {metadata.get('Name', '<missing Name>')} Identifier: {metadata.get('Identifier', '<missing Identifier>')} Datalad: https://github.com/{gh_user}/tpl-{template_id} ### Authors {', '.join(metadata['Authors'])}. ### License {metadata.get('License', metadata.get('Licence', '<missing License>'))} ### Cohorts {' '.join(('The dataset contains', str(len(metadata.get('cohort', []))), 'cohorts.')) if metadata.get('cohort') else 'The dataset does not contain cohorts.'} ### References and links {', '.join(metadata.get('ReferencesAndLinks', [])) or 'N/A'} """) run_command( "hub pull-request -b templateflow:tpl-intake " f"-h {gh_user}:pr/tpl-{template_id} " f"-F {repodir.parent / 'message.md'}", cwd=str(repodir), capture_output=False, env={ "GITHUB_USER": gh_user, "GITHUB_TOKEN": gh_token }, )
def test_save(path): ds = Dataset(path) with open(op.join(path, "new_file.tst"), "w") as f: f.write("something") ds.repo.add("new_file.tst", git=True) ok_(ds.repo.dirty) ds.save(message="add a new file") assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo)) with open(op.join(path, "new_file.tst"), "w") as f: f.write("modify") ok_(ds.repo.dirty) ds.save(message="modified new_file.tst") assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo)) # save works without ds and files given in the PWD with open(op.join(path, "new_file.tst"), "w") as f: f.write("rapunzel") with chpwd(path): save(message="love rapunzel") assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo)) # and also without `-a` when things are staged with open(op.join(path, "new_file.tst"), "w") as f: f.write("exotic") ds.repo.add("new_file.tst", git=True) with chpwd(path): save(message="love marsians") assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo)) files = ['one.txt', 'two.txt'] for fn in files: with open(op.join(path, fn), "w") as f: f.write(fn) ds.save([op.join(path, f) for f in files]) # superfluous call to save (alll saved it already), should not fail # but report that nothing was saved assert_status('notneeded', ds.save(message="set of new files")) assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo)) # create subdataset subds = ds.create('subds') assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo)) # modify subds with open(op.join(subds.path, "some_file.tst"), "w") as f: f.write("something") subds.save() assert_repo_status(subds.path, annex=isinstance(subds.repo, AnnexRepo)) # ensure modified subds is committed ds.save() assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo)) # now introduce a change downstairs subds.create('someotherds') assert_repo_status(subds.path, annex=isinstance(subds.repo, AnnexRepo)) ok_(ds.repo.dirty) # and save via subdataset path ds.save('subds', version_tag='new_sub') assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo)) tags = ds.repo.get_tags() ok_(len(tags) == 1) eq_(tags[0], dict(hexsha=ds.repo.get_hexsha(), name='new_sub')) # fails when retagged, like git does res = ds.save(version_tag='new_sub', on_failure='ignore') assert_status('error', res) assert_result_count(res, 1, action='save', type='dataset', path=ds.path, message=('cannot tag this version: %s', "fatal: tag 'new_sub' already exists"))
def test_path_arg_call(path): ds = create(path) for testfile in (ds.pathobj / 'abs.txt', ds.pathobj / 'rel.txt'): testfile.write_text(u'123') save(dataset=ds.path, path=[testfile.name], to_git=True)
def _makeds(path, levels, ds=None, max_leading_dirs=2): """Create a hierarchy of datasets Used recursively, with current invocation generating datasets for the first level, and delegating sub-levels to recursive invocation Parameters ---------- path : str Path to the top directory under which dataset will be created. If relative -- relative to current directory levels : list of list List of specifications for :func:`random.randint` call per each level. ds : Dataset, optional Super-dataset which would contain a new dataset (thus its path whould be a parent of path. Note that ds needs to be installed. max_leading_dirs : int, optional Up to how many leading directories withing a dataset could lead to a sub-dataset Yields ------ str Path to the generated dataset(s) """ # we apparently can't import api functionality within api from datalad.api import save # To simplify managing all the file paths etc if not isabs(path): path = abspath(path) # make it a git (or annex??) repository... ok - let's do randomly one or another ;) RepoClass = GitRepo if random.randint(0, 1) else AnnexRepo lgr.info("Generating repo of class %s under %s", RepoClass, path) repo = RepoClass(path, create=True) # let's create some dummy file and add it to the beast fn = opj(path, "file%d.dat" % random.randint(1, 1000)) with open(fn, 'w') as f: f.write(fn) repo.add(fn, git=True) repo.commit(msg="Added %s" % fn) yield path if levels: # make a dataset for that one since we want to add sub datasets ds_ = Dataset(path) # Process the levels level, levels_ = levels[0], levels[1:] nrepos = random.randint(*level) # how many subds to generate for irepo in range(nrepos): # we would like to have up to 2 leading dirs subds_path = opj(*(['d%i' % i for i in range(random.randint(0, max_leading_dirs+1))] + ['r%i' % irepo])) subds_fpath = opj(path, subds_path) # yield all under for d in _makeds(subds_fpath, levels_, ds=ds_): yield d if ds: assert ds.is_installed() out = save( path, dataset=ds, )
def process_files(files): failed = [] for line in fileinput.input(files): name, kind = parse_input(line) if kind == "namespace": try: repos = list(get_namespace_repos(name)) except requests.HTTPError as exc: lgr.warning( "Failed to list repositories for %s (status %s). Skipping", name, exc.response.status_code) failed.append(name) continue else: repos = [name] target_architectures_re = re.compile(target_architectures) target_tags_re = re.compile(target_tags) for repo in repos: lgr.info("Working on %s", repo) try: registry = RepoRegistry(repo) #pprint(list(zip(sorted(_all_tags['latest'], key=lambda r: r['digest']), sorted(_all_tags['1.32.0'], # key=lambda r: r['digest'])))) tag_images = dict(get_repo_tag_images(repo)) # 'latest' tag is special in docker, it is the default one # which might typically point to some other release/version. # If we find that it is the case, we do not create a dedicated "latest" # image/datalad container -- we just add container entry pointing to that # one. If there is no matching one -- we do get "latest" latest_matching_tag = None # NOTE: "master" is also often used to signal a moving target # it might, or not, correspond to tagged release. I guess we are just # doomed to breed those if target_tags_re.match('latest'): matching_tags = [] for tag, images in tag_images.items(): if tag == 'latest' or not target_tags_re.match(tag): lgr.debug("Skipping tag %(tag)s") continue if images == tag_images['latest']: matching_tags.append(tag) if len(matching_tags) >= 1: if len(matching_tags) > 1: lgr.info( "Multiple tags images match latest, taking the first: %s", ', '.join(matching_tags)) latest_matching_tag = matching_tags[0] lgr.info("Taking %s as the one for 'latest'", latest_matching_tag) else: # TODO: if there is no latest, we should at least establish the # convenient one for each tag pass for tag, images in tag_images.items(): if tag == 'latest' and latest_matching_tag: continue # skip since we will handle it if not target_tags_re.match(tag): lgr.debug("Skipping tag %(tag)s") continue multiarch = len({i['architecture'] for i in images}) > 1 for image in images: architecture = image['architecture'] if not target_architectures_re.match(architecture): lgr.debug("Skipping architecture %(architecture)s", image) continue manifest = registry.get_manifest(image['digest']) digest = manifest["config"]["digest"] # yoh: if I got it right, it is actual image ID we see in docker images assert digest.startswith("sha256:") digest = digest[7:] digest_short = digest[:12] # use short version in name last_pushed = image.get('last_pushed') if last_pushed: assert last_pushed.endswith('Z') # take only date last_pushed = last_pushed[:10].replace('-', '') assert len(last_pushed) == 8 cleaner_repo = repo # this is how it looks on hub.docker.com URL if repo.startswith('library/'): cleaner_repo = "_/" + cleaner_repo[len('library/' ):] image_name = f"{cleaner_repo}/{tag}/" if multiarch: image_name += f"{architecture}-" if last_pushed: # apparently not in all, e.g. no for repronim/neurodocker # may be None for those built on the hub? image_name += f"{last_pushed}-" image_name += f"{digest_short}" dl_container_name = clean_container_name( str(image_name)) image_path = Path("images") / image_name url = f"dhub://{repo}:{tag}@{image['digest']}" save_paths = [] if image_path.exists(): lgr.info("%s already exists, skipping adding", str(image_path)) else: save_paths.append( write_json( Path(str(image_path) + '.manifest.json'), manifest)) save_paths.append( write_json( Path(str(image_path) + '.image.json'), image)) add_container(url, dl_container_name, image_path) # TODO: either fix datalad-container for https://github.com/datalad/datalad-container/issues/98 # or here, since we have manifest, we can datalad download-url, and add-archive-content # of the gzipped layers (but without untarring) - that should add datalad-archive # urls to individual layers in the "saved" version # TODO: make it in a single commit with add_container at least, # or one commit for the whole repo sweep save( path=save_paths, message= f"Added manifest and image records for {dl_container_name}" ) # TODO: ensure .datalad/config to have additional useful fields: # architecture, os, and manually "updateurl" since not added for # dhub:// ATM if tag == latest_matching_tag and architecture == default_architecture: # TODO remove section if exists, copy this one lgr.warning( "Tracking of 'latest' is not yet implemented") except requests.HTTPError as exc: lgr.warning( "Failed processing %s. Skipping\n status %s for %s", repo, exc.response.status_code, exc.response.url) failed.append(name) continue return failed