def test_add_readme(path): ds = Dataset(path).create(force=True) ds.save() ds.aggregate_metadata() ok_clean_git(ds.path) assert_status('ok', ds.add_readme()) # should use default name eq_( open(opj(path, 'README.md')).read(), """\ # Dataset "demo_ds" this is for play ### Authors - Betty - Tom ### License PDDL ## General information This is a DataLad dataset (id: {id}). For more information on DataLad and on how to work with its datasets, see the DataLad documentation at: http://docs.datalad.org """.format( id=ds.id)) # should skip on re-run assert_status('notneeded', ds.add_readme())
def test_invalid_call(origin, tdir): ds = Dataset(origin) ds.uninstall('subm 1', check=False) # nothing assert_status('error', publish('/notthere', on_failure='ignore')) # known, but not present assert_status('impossible', publish(opj(ds.path, 'subm 1'), on_failure='ignore')) # --since without dataset is now supported as long as it # could be identified # assert_raises(InsufficientArgumentsError, publish, since='HEAD') # but if it couldn't be, then should indeed crash with chpwd(tdir): assert_raises(InsufficientArgumentsError, publish, since='HEAD') # new dataset, with unavailable subdataset dummy = Dataset(tdir).create() dummy_sub = dummy.create('sub') dummy_sub.uninstall() assert_in('sub', dummy.subdatasets(fulfilled=False, result_xfm='relpaths')) # now an explicit call to publish the unavailable subdataset assert_result_count( dummy.publish('sub', on_failure='ignore'), 1, path=dummy_sub.path, status='impossible', type='dataset')
def test_here(path): # few smoke tests regarding the 'here' sibling ds = create(path) res = ds.siblings( 'query', on_failure='ignore', result_renderer=None) assert_status('ok', res) assert_result_count(res, 1) assert_result_count(res, 1, name='here') here = res[0] eq_(ds.repo.uuid, here['annex-uuid']) assert_in('annex-description', here) assert_in('annex-bare', here) assert_in('available_local_disk_space', here) # set a description res = ds.siblings( 'configure', name='here', description='very special', on_failure='ignore', result_renderer=None) assert_status('ok', res) assert_result_count(res, 1) assert_result_count(res, 1, name='here') here = res[0] eq_('very special', here['annex-description'])
def test_get_recurse_dirs(o_path, c_path): # prepare source: origin = Dataset(o_path).create(force=True) origin.add('.') ds = install( c_path, source=o_path, result_xfm='datasets', return_type='item-or-list') file_list = ['file1.txt', opj('subdir', 'file2.txt'), opj('subdir', 'subsubdir', 'file3.txt'), opj('subdir', 'subsubdir', 'file4.txt')] files_in_sub = [f for f in file_list if f.startswith(with_pathsep('subdir'))] # no content present: ok_(not any(ds.repo.file_has_content(file_list))) result = ds.get('subdir') # check result: assert_status('ok', result) eq_(set([item.get('path')[len(ds.path) + 1:] for item in result if item['type'] == 'file']), set(files_in_sub)) # we also get one report on the subdir eq_(len(result) - 1, len(files_in_sub)) # got all files beneath subdir: ok_(all(ds.repo.file_has_content(files_in_sub))) # additionally got file1.txt silently, since it has the same content as # subdir/subsubdir/file4.txt: ok_(ds.repo.file_has_content('file1.txt') is True)
def test_install_list(path, top_path): # we want to be able to install several things, if these are known # (no 'source' allowed). Therefore first toplevel: ds = install(top_path, source=path, recursive=False) assert_not_in('annex.hardlink', ds.config) ok_(ds.is_installed()) sub1 = Dataset(opj(top_path, 'subm 1')) sub2 = Dataset(opj(top_path, '2')) ok_(not sub1.is_installed()) ok_(not sub2.is_installed()) # fails, when `source` is passed: assert_raises(ValueError, ds.install, path=['subm 1', '2'], source='something') # now should work: result = ds.install(path=['subm 1', '2'], result_xfm='paths') ok_(sub1.is_installed()) ok_(sub2.is_installed()) eq_(set(result), {sub1.path, sub2.path}) # and if we request it again via get, result should be empty get_result = ds.get(path=['subm 1', '2'], get_data=False) assert_status('notneeded', get_result)
def test_drop_nocrash_absent_subds(path): parent = Dataset(path).create() sub = parent.create('sub') parent.uninstall('sub') ok_clean_git(parent.path) with chpwd(path): assert_status('notneeded', drop('.', recursive=True))
def test_get_mixed_hierarchy(src, path): origin = Dataset(src).create(no_annex=True) origin_sub = origin.create('subds') with open(opj(origin.path, 'file_in_git.txt'), "w") as f: f.write('no idea') with open(opj(origin_sub.path, 'file_in_annex.txt'), "w") as f: f.write('content') origin.add('file_in_git.txt', to_git=True) origin_sub.add('file_in_annex.txt') origin.save() # now, install that thing: ds, subds = install( path, source=src, recursive=True, result_xfm='datasets', return_type='item-or-list', result_filter=None) ok_(subds.repo.file_has_content("file_in_annex.txt") is False) # and get: result = ds.get(curdir, recursive=True) # git repo and subds assert_status(['ok', 'notneeded'], result) assert_result_count( result, 1, path=opj(subds.path, "file_in_annex.txt"), status='ok') ok_(subds.repo.file_has_content("file_in_annex.txt") is True)
def test_subdataset_save(path): parent = Dataset(path).create() sub = parent.create('sub') assert_repo_status(parent.path) create_tree(parent.path, { "untracked": 'ignore', 'sub': { "new": "wanted"}}) sub.save('new') # defined state: one untracked, modified (but clean in itself) subdataset assert_repo_status(sub.path) assert_repo_status(parent.path, untracked=['untracked'], modified=['sub']) # `save sub` does not save the parent!! with chpwd(parent.path): assert_status('notneeded', save(dataset=sub.path)) assert_repo_status(parent.path, untracked=['untracked'], modified=['sub']) # `save -u .` saves the state change in the subdataset, # but leaves any untracked content alone with chpwd(parent.path): assert_status('ok', parent.save(updated=True)) assert_repo_status(parent.path, untracked=['untracked']) # get back to the original modified state and check that -S behaves in # exactly the same way create_tree(parent.path, { 'sub': { "new2": "wanted2"}}) sub.save('new2') assert_repo_status(parent.path, untracked=['untracked'], modified=['sub'])
def test_status_basics(path, linkpath, otherdir): if not on_windows: # make it more complicated by default ut.Path(linkpath).symlink_to(path, target_is_directory=True) path = linkpath with chpwd(path): assert_raises(NoDatasetArgumentFound, status) ds = Dataset(path).create() # outcome identical between ds= and auto-discovery with chpwd(path): assert_raises(IncompleteResultsError, status, path=otherdir) stat = status() eq_(stat, ds.status()) assert_status('ok', stat) # we have a bunch of reports (be vague to be robust to future changes assert len(stat) > 2 # check the composition for s in stat: eq_(s['status'], 'ok') eq_(s['action'], 'status') eq_(s['state'], 'clean') eq_(s['type'], 'file') assert_in('gitshasum', s) assert_in('bytesize', s) eq_(s['refds'], ds.path)
def test_audio(path): ds = Dataset(path).create() ds.config.add('datalad.metadata.nativetype', 'audio', where='dataset') copy( opj(dirname(dirname(dirname(__file__))), 'tests', 'data', 'audio.mp3'), path) ds.add('.') ok_clean_git(ds.path) res = ds.aggregate_metadata() assert_status('ok', res) res = ds.metadata('audio.mp3') assert_result_count(res, 1) # from this extractor meta = res[0]['metadata']['audio'] for k, v in target.items(): eq_(meta[k], v) assert_in('@context', meta) uniques = ds.metadata( reporton='datasets', return_type='item-or-list')['metadata']['datalad_unique_content_properties'] # test file has it, but uniques have it blanked out, because the extractor considers it worthless # for discovering whole datasets assert_in('bitrate', meta) eq_(uniques['audio']['bitrate'], None) # 'date' field carries not value, hence gets exclude from the unique report assert_in('date', meta) assert(not meta['date']) assert_not_in('date', uniques['audio'])
def test_notclone_known_subdataset(src, path): # get the superdataset: ds = clone(src, path, result_xfm='datasets', return_type='item-or-list') # subdataset not installed: subds = Dataset(opj(path, 'subm 1')) assert_false(subds.is_installed()) assert_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths')) assert_not_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths')) # clone is not meaningful res = ds.clone('subm 1', on_failure='ignore') assert_status('error', res) assert_message('Failed to clone from any candidate source URL. ' 'Encountered errors per each url were: %s', res) # get does the job res = ds.get(path='subm 1', get_data=False) assert_status('ok', res) ok_(subds.is_installed()) ok_(AnnexRepo.is_valid_repo(subds.path, allow_noninitialized=False)) # Verify that it is the correct submodule installed and not # new repository initiated eq_(set(subds.repo.get_indexed_files()), {'test.dat', 'INFO.txt', 'test-annex.dat'}) assert_not_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths')) assert_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths'))
def test_install_simple_local(src, path): origin = Dataset(path) # now install it somewhere else ds = install(path, source=src, description='mydummy') eq_(ds.path, path) ok_(ds.is_installed()) if not isinstance(origin.repo, AnnexRepo): # this means it is a GitRepo ok_(isinstance(origin.repo, GitRepo)) # stays plain Git repo ok_(isinstance(ds.repo, GitRepo)) ok_(not isinstance(ds.repo, AnnexRepo)) ok_(GitRepo.is_valid_repo(ds.path)) eq_(set(ds.repo.get_indexed_files()), {'test.dat', 'INFO.txt'}) ok_clean_git(path, annex=False) else: # must be an annex ok_(isinstance(ds.repo, AnnexRepo)) ok_(AnnexRepo.is_valid_repo(ds.path, allow_noninitialized=False)) eq_(set(ds.repo.get_indexed_files()), {'test.dat', 'INFO.txt', 'test-annex.dat'}) ok_clean_git(path, annex=True) # no content was installed: ok_(not ds.repo.file_has_content('test-annex.dat')) uuid_before = ds.repo.uuid eq_(ds.repo.get_description(), 'mydummy') # installing it again, shouldn't matter: res = install(path, source=src, result_xfm=None, return_type='list') assert_status('notneeded', res) ok_(ds.is_installed()) if isinstance(origin.repo, AnnexRepo): eq_(uuid_before, ds.repo.uuid)
def test_failed_clone(dspath): ds = create(dspath) res = ds.clone("http://nonexistingreallyanything.datalad.org/bla", "sub", on_failure='ignore') assert_status('error', res) assert_message('Failed to clone from any candidate source URL. ' 'Encountered errors per each url were: %s', res)
def test_invalid_call(path): with chpwd(path): # no dataset, no luck assert_raises(NoDatasetArgumentFound, run, 'doesntmatter') # dirty dataset ds = Dataset(path).create() create_tree(ds.path, {'this': 'dirty'}) assert_status('impossible', run('doesntmatter', on_failure='ignore'))
def test_uninstall_invalid(path): ds = Dataset(path).create(force=True) for method in (uninstall, remove, drop): assert_raises(InsufficientArgumentsError, method) # refuse to touch stuff outside the dataset assert_status('error', method(dataset=ds, path='..', on_failure='ignore')) # same if it doesn't exist, for consistency assert_status('error', method(dataset=ds, path='../madeupnonexist', on_failure='ignore'))
def test_safetynet(path): ds = Dataset(path).create() os.makedirs(opj(ds.path, 'deep', 'down')) for p in (ds.path, opj(ds.path, 'deep'), opj(ds.path, 'deep', 'down')): with chpwd(p): # will never remove PWD, or anything outside the dataset for target in (ds.path, os.curdir, os.pardir, opj(os.pardir, os.pardir)): assert_status( ('error', 'impossible'), uninstall(path=target, on_failure='ignore'))
def test_replace_and_relative_sshpath(src_path, dst_path): # We need to come up with the path relative to our current home directory # https://github.com/datalad/datalad/issues/1653 # but because we override HOME the HOME on the remote end would be # different even though a localhost. So we need to query it from datalad import ssh_manager ssh = ssh_manager.get_connection('localhost') remote_home, err = ssh('pwd') assert not err remote_home = remote_home.rstrip('\n') dst_relpath = os.path.relpath(dst_path, remote_home) url = 'localhost:%s' % dst_relpath ds = Dataset(src_path).create() create_tree(ds.path, {'sub.dat': 'lots of data'}) ds.save('sub.dat') ds.create_sibling(url, ui=True) published = ds.publish(to='localhost', transfer_data='all') assert_result_count(published, 1, path=opj(ds.path, 'sub.dat')) # verify that hook runs and there is nothing in stderr # since it exits with 0 exit even if there was a problem out, err = Runner(cwd=opj(dst_path, '.git'))(_path_('hooks/post-update')) assert_false(out) assert_false(err) # Verify that we could replace and publish no problem # https://github.com/datalad/datalad/issues/1656 # Strangely it spits outs IncompleteResultsError exception atm... so just # checking that it fails somehow res = ds.create_sibling(url, on_failure='ignore') assert_status('error', res) assert_in('already configured', res[0]['message'][0]) # "Settings" such as UI do not persist, so we specify it again # for the test below depending on it ds.create_sibling(url, existing='replace', ui=True) published2 = ds.publish(to='localhost', transfer_data='all') assert_result_count(published2, 1, path=opj(ds.path, 'sub.dat')) # and one more test since in above test it would not puke ATM but just # not even try to copy since it assumes that file is already there create_tree(ds.path, {'sub2.dat': 'more data'}) ds.save('sub2.dat') published3 = ds.publish(to='localhost', transfer_data='none') # we publish just git assert_result_count(published3, 0, path=opj(ds.path, 'sub2.dat')) # now publish "with" data, which should also trigger the hook! # https://github.com/datalad/datalad/issues/1658 from glob import glob from datalad.consts import WEB_META_LOG logs_prior = glob(_path_(dst_path, WEB_META_LOG, '*')) published4 = ds.publish(to='localhost', transfer_data='all') assert_result_count(published4, 1, path=opj(ds.path, 'sub2.dat')) logs_post = glob(_path_(dst_path, WEB_META_LOG, '*')) eq_(len(logs_post), len(logs_prior) + 1) assert_postupdate_hooks(dst_path)
def test_get_single_file(path): ds = Dataset(path) ok_(ds.is_installed()) ok_(ds.repo.file_has_content('test-annex.dat') is False) result = ds.get("test-annex.dat") assert_result_count(result, 1) assert_status('ok', result) eq_(result[0]['path'], opj(ds.path, 'test-annex.dat')) eq_(result[0]['annexkey'], ds.repo.get_file_key('test-annex.dat')) ok_(ds.repo.file_has_content('test-annex.dat') is True)
def test_clone_crcns(tdir, ds_path): with chpwd(tdir): res = clone('///', path="all-nonrecursive", on_failure='ignore') assert_status('ok', res) # again, but into existing dataset: ds = create(ds_path) crcns = ds.clone("///crcns", result_xfm='datasets', return_type='item-or-list') ok_(crcns.is_installed()) eq_(crcns.path, opj(ds_path, "crcns")) assert_in(crcns.path, ds.subdatasets(result_xfm='paths'))
def test_update_strategy(path): base = Dataset(opj(path, 'origin')).create(force=True) # force all metadata objects into the annex with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f: f.write( '** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n') sub = base.create('sub', force=True) subsub = sub.create(opj('subsub'), force=True) base.add('.', recursive=True) ok_clean_git(base.path) # we start clean for ds in base, sub, subsub: eq_(len(_get_contained_objs(ds)), 0) # aggregate the base dataset only, nothing below changes base.aggregate_metadata() eq_(len(_get_contained_objs(base)), 2) for ds in sub, subsub: eq_(len(_get_contained_objs(ds)), 0) # aggregate the entire tree, but by default only updates # the top-level dataset with all objects, none of the leaf # or intermediate datasets get's touched base.aggregate_metadata(recursive=True) eq_(len(_get_contained_objs(base)), 6) eq_(len(_get_referenced_objs(base)), 6) for ds in sub, subsub: eq_(len(_get_contained_objs(ds)), 0) res = base.metadata(get_aggregates=True) assert_result_count(res, 3) # it is impossible to query an intermediate or leaf dataset # for metadata for ds in sub, subsub: assert_status( 'impossible', ds.metadata(get_aggregates=True, on_failure='ignore')) # get the full metadata report target_meta = base.metadata(return_type='list') # now redo full aggregation, this time updating all # (intermediate) datasets base.aggregate_metadata(recursive=True, update_mode='all') eq_(len(_get_contained_objs(base)), 6) eq_(len(_get_contained_objs(sub)), 4) eq_(len(_get_contained_objs(subsub)), 2) # it is now OK to query an intermediate or leaf dataset # for metadata for ds in sub, subsub: assert_status( 'ok', ds.metadata(get_aggregates=True, on_failure='ignore')) # all of that has no impact on the reported metadata eq_(target_meta, base.metadata(return_type='list'))
def test_clone_report_permission_issue(tdir): pdir = _path_(tdir, 'protected') mkdir(pdir) # make it read-only chmod(pdir, 0o555) with chpwd(pdir): res = clone('///', result_xfm=None, return_type='list', on_failure='ignore') assert_status('error', res) assert_result_count( res, 1, status='error', message="could not create work tree dir '%s/%s': Permission denied" % (pdir, get_datasets_topdir()) )
def _test_target_ssh_inherit(standardgroup, src_path, target_path): ds = Dataset(src_path).create() target_url = 'localhost:%s' % target_path remote = "magical" # for the test of setting a group, will just smoke test while using current # user's group ds.create_sibling(target_url, name=remote, shared='group', group=os.getgid()) # not doing recursively if standardgroup: ds.repo.set_preferred_content('wanted', 'standard', remote) ds.repo.set_preferred_content('group', standardgroup, remote) ds.publish(to=remote) # now a month later we created a new subdataset subds = ds.create('sub') # so now we got a hierarchy! create_tree(subds.path, {'sub.dat': 'lots of data'}) subds.add('sub.dat') ok_file_under_git(subds.path, 'sub.dat', annexed=True) target_sub = Dataset(opj(target_path, 'sub')) # since we do not have yet/thus have not used an option to record to publish # to that sibling by default (e.g. --set-upstream), if we run just ds.publish # -- should fail assert_result_count( ds.publish(on_failure='ignore'), 1, status='impossible', message='No target sibling configured for default publication, please specific via --to') ds.publish(to=remote) # should be ok, non recursive; BUT it (git or us?) would # create an empty sub/ directory ok_(not target_sub.is_installed()) # still not there res = ds.publish(to=remote, recursive=True, on_failure='ignore') assert_result_count(res, 2) assert_status(('error', 'notneeded'), res) assert_result_count( res, 1, status='error', message=("Unknown target sibling '%s' for publication", 'magical')) ds.publish(to=remote, recursive=True, missing='inherit') # we added the remote and set all the eq_(subds.repo.get_preferred_content('wanted', remote), 'standard' if standardgroup else '') eq_(subds.repo.get_preferred_content('group', remote), standardgroup or '') ok_(target_sub.is_installed()) # it is there now eq_(target_sub.repo.config.get('core.sharedrepository'), '1') # and we have transferred the content if standardgroup and standardgroup == 'backup': # only then content should be copied ok_file_has_content(opj(target_sub.path, 'sub.dat'), 'lots of data') else: # otherwise nothing is copied by default assert_false(target_sub.repo.file_has_content('sub.dat'))
def test_run_explicit(path): ds = Dataset(path) assert_false(ds.repo.file_has_content("test-annex.dat")) create_tree(ds.path, {"dirt_untracked": "untracked", "dirt_modified": "modified"}) ds.save("dirt_modified", to_git=True) with open(op.join(path, "dirt_modified"), "a") as ofh: ofh.write(", more") # We need explicit=True to run with dirty repo. assert_status("impossible", ds.run("cat test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], on_failure="ignore")) hexsha_initial = ds.repo.get_hexsha() # If we specify test-annex.dat as an input, it will be retrieved before the # run. ds.run("cat test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], explicit=True) ok_(ds.repo.file_has_content("test-annex.dat")) # We didn't commit anything because outputs weren't specified. assert_false(ds.repo.file_has_content("doubled.dat")) eq_(hexsha_initial, ds.repo.get_hexsha()) # If an input doesn't exist, we just show the standard warning. with swallow_logs(new_level=logging.WARN) as cml: with swallow_outputs(): ds.run("ls", inputs=["not-there"], explicit=True) assert_in("Input does not exist: ", cml.out) remove(op.join(path, "doubled.dat")) hexsha_initial = ds.repo.get_hexsha() ds.run("cat test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], outputs=["doubled.dat"], explicit=True) ok_(ds.repo.file_has_content("doubled.dat")) assert_repo_status(ds.path, modified=["dirt_modified"], untracked=['dirt_untracked']) neq_(hexsha_initial, ds.repo.get_hexsha()) # Saving explicit outputs works from subdirectories. subdir = op.join(path, "subdir") mkdir(subdir) with chpwd(subdir): run("echo insubdir >foo", explicit=True, outputs=["foo"]) ok_(ds.repo.file_has_content(op.join("subdir", "foo")))
def test_archive(path): ds = Dataset(opj(path, 'ds')).create(force=True) ds.save() committed_date = ds.repo.get_commit_date() default_outname = opj(path, 'datalad_{}.tar.gz'.format(ds.id)) with chpwd(path): res = list(ds.export_archive()) assert_status('ok', res) assert_result_count(res, 1) assert(isabs(res[0]['path'])) assert_true(os.path.exists(default_outname)) custom_outname = opj(path, 'myexport.tar.gz') # feed in without extension ds.export_archive(filename=custom_outname[:-7]) assert_true(os.path.exists(custom_outname)) custom1_md5 = md5sum(custom_outname) # encodes the original archive filename -> different checksum, despit # same content assert_not_equal(md5sum(default_outname), custom1_md5) # should really sleep so if they stop using time.time - we know time.sleep(1.1) ds.export_archive(filename=custom_outname) # should not encode mtime, so should be identical assert_equal(md5sum(custom_outname), custom1_md5) def check_contents(outname, prefix): with tarfile.open(outname) as tf: nfiles = 0 for ti in tf: # any annex links resolved assert_false(ti.issym()) ok_startswith(ti.name, prefix + '/') assert_equal(ti.mtime, committed_date) if '.datalad' not in ti.name: # ignore any files in .datalad for this test to not be # susceptible to changes in how much we generate a meta info nfiles += 1 # we have exactly four files (includes .gitattributes for default # MD5E backend), and expect no content for any directory assert_equal(nfiles, 4) check_contents(default_outname, 'datalad_%s' % ds.id) check_contents(custom_outname, 'myexport') # now loose some content ds.drop('file_up', check=False) assert_raises(IOError, ds.export_archive, filename=opj(path, 'my')) ds.export_archive(filename=opj(path, 'partial'), missing_content='ignore') assert_true(os.path.exists(opj(path, 'partial.tar.gz')))
def test_diff_recursive(path): ds = Dataset(path).create() sub = ds.create('sub') # look at the last change, and confirm a dataset was added res = ds.diff(revision='HEAD~1..HEAD') assert_result_count(res, 1, action='diff', state='added', path=sub.path, type='dataset') # now recursive res = ds.diff(recursive=True, revision='HEAD~1..HEAD') # we also get the entire diff of the subdataset from scratch assert_status('ok', res) ok_(len(res) > 3) # one specific test assert_result_count(res, 1, action='diff', state='added', path=opj(sub.path, '.datalad', 'config')) # now we add a file to just the parent create_tree(ds.path, {'onefile': 'tobeadded', 'sub': {'twofile': 'tobeadded'}}) res = ds.diff(recursive=True, report_untracked='all') assert_result_count(res, 3) assert_result_count(res, 1, action='diff', state='untracked', path=opj(ds.path, 'onefile'), type='file') assert_result_count(res, 1, action='diff', state='modified', path=sub.path, type='dataset') assert_result_count(res, 1, action='diff', state='untracked', path=opj(sub.path, 'twofile'), type='file') # save sub sub.add('.') # save sub in parent ds.save() # save addition in parent ds.add('.') ok_clean_git(ds.path) # look at the last change, only one file was added res = ds.diff(revision='HEAD~1..HEAD') assert_result_count(res, 1) assert_result_count(res, 1, action='diff', state='added', path=opj(ds.path, 'onefile'), type='file') # now the exact same thing with recursion, must not be different from the call # above res = ds.diff(recursive=True, revision='HEAD~1..HEAD') assert_result_count(res, 1) # last change in parent assert_result_count(res, 1, action='diff', state='added', path=opj(ds.path, 'onefile'), type='file') # one further back brings in the modified subdataset, and the added file within it res = ds.diff(recursive=True, revision='HEAD~2..HEAD') assert_result_count(res, 3) assert_result_count(res, 1, action='diff', state='added', path=opj(ds.path, 'onefile'), type='file') assert_result_count(res, 1, action='diff', state='added', path=opj(sub.path, 'twofile'), type='file') assert_result_count(res, 1, action='diff', state='modified', path=sub.path, type='dataset')
def test_install_datasets_root(tdir): with chpwd(tdir): ds = install("///") ok_(ds.is_installed()) eq_(ds.path, opj(tdir, get_datasets_topdir())) # do it a second time: result = install("///", result_xfm=None, return_type='list') assert_status('notneeded', result) eq_(YieldDatasets()(result[0]), ds) # and a third time into an existing something, that is not a dataset: with open(opj(tdir, 'sub', 'a_file.txt'), 'w') as f: f.write("something") with assert_raises(IncompleteResultsError) as cme: install("sub", source='///') assert_in("already exists and not empty", str(cme.exception))
def test_remove_subds(path): ds = create(path) ds.create('sub') ds.create(op.join('sub', 'subsub')) assert_repo_status(ds.path) assert_result_count( ds.subdatasets(), 1, path=op.join(ds.path, 'sub')) # all good at this point, subdataset known, dataset clean # now have some external force wipe out the subdatasets rmtree(op.join(ds.path, 'sub')) assert_result_count( ds.status(), 1, path=op.join(ds.path, 'sub'), state='deleted') # a single call to save() must fix up the mess assert_status('ok', ds.save()) assert_repo_status(ds.path)
def test_exif(path): ds = Dataset(path).create() ds.config.add('datalad.metadata.nativetype', 'exif', where='dataset') copy( opj(dirname(dirname(dirname(__file__))), 'tests', 'data', 'exif.jpg'), path) ds.save() ok_clean_git(ds.path) res = ds.aggregate_metadata() assert_status('ok', res) res = ds.metadata('exif.jpg') assert_result_count(res, 1) # from this extractor meta = res[0]['metadata']['exif'] for k, v in target.items(): eq_(meta[k], v) assert_in('@context', meta)
def test_multiway_merge(path): # prepare ds with two siblings, but no tracking branch ds = Dataset(op.join(path, 'ds_orig')).create() r1 = AnnexRepo(path=op.join(path, 'ds_r1'), git_opts={'bare': True}) r2 = GitRepo(path=op.join(path, 'ds_r2'), git_opts={'bare': True}) ds.siblings(action='add', name='r1', url=r1.path) ds.siblings(action='add', name='r2', url=r2.path) assert_status('ok', ds.publish(to='r1')) assert_status('ok', ds.publish(to='r2')) # just a fetch should be no issue assert_status('ok', ds.update()) # ATM we do not support multi-way merges assert_status('impossible', ds.update(merge=True, on_failure='ignore'))
def test_failon_nodrop(path): # test to make sure that we do not wipe out data when checks are enabled # despite the general error behavior mode ds = Dataset(path).create() # we play with a subdataset to bypass the tests that prevent the removal # of top-level datasets sub = ds.create('sub') create_tree(sub.path, {'test': 'content'}) ds.save(opj('sub', 'test')) ok_clean_git(ds.path) eq_(['test'], sub.repo.get_annexed_files(with_content_only=True)) # we put one file into the dataset's annex, no redundant copies # neither uninstall nor remove should work res = ds.uninstall('sub', check=True, on_failure='ignore') assert_status(['error', 'impossible'], res) eq_(['test'], sub.repo.get_annexed_files(with_content_only=True)) # same with remove res = ds.remove('sub', check=True, on_failure='ignore') assert_status(['error', 'impossible'], res) eq_(['test'], sub.repo.get_annexed_files(with_content_only=True))
def test_install_skip_failed_recursive(src, path): # install top level: ds = install(path, source=src) sub1 = Dataset(opj(path, 'subm 1')) sub2 = Dataset(opj(path, 'subm 2')) # sabotage recursive installation of 'subm 1' by polluting the target: with open(opj(path, 'subm 1', 'blocking.txt'), "w") as f: f.write("sdfdsf") with swallow_logs(new_level=logging.WARNING) as cml: result = ds.get(os.curdir, recursive=True, on_failure='ignore', result_xfm=None) # toplevel dataset was in the house already assert_result_count(result, 0, path=ds.path, type='dataset') assert_status('error', [result[0]]) assert_in_results(result, status='ok', path=sub2.path) cml.assert_logged( msg="target path already exists and not empty".format(sub1.path), regex=False, level='ERROR')
def test_basics(path, nodspath): ds = Dataset(path).create() last_state = ds.repo.get_hexsha() # run inside the dataset with chpwd(path): # runs nothing, does nothing assert_result_count(ds.run(), 0) eq_(last_state, ds.repo.get_hexsha()) # provoke command failure with assert_raises(CommandError) as cme: ds.run('7i3amhmuch9invalid') # let's not speculate that the exit code is always 127 ok_(cme.code > 0) eq_(last_state, ds.repo.get_hexsha()) # now one that must work res = ds.run('touch empty', message='TEST') ok_clean_git(ds.path) assert_result_count(res, 2) # TODO 'state' is still untracked!!! assert_result_count(res, 1, action='add', path=opj(ds.path, 'empty'), type='file') assert_result_count(res, 1, action='save', path=ds.path) commit_msg = ds.repo.repo.head.commit.message ok_(commit_msg.startswith('[DATALAD RUNCMD] TEST')) # crude test that we have a record for the PWD assert_in('"pwd": "."', commit_msg) last_state = ds.repo.get_hexsha() # now run a command that will not alter the dataset res = ds.run('touch empty', message='NOOP_TEST') assert_status('notneeded', res) eq_(last_state, ds.repo.get_hexsha()) # run outside the dataset, should still work but with limitations with chpwd(nodspath): res = ds.run(['touch', 'empty2'], message='TEST') assert_status('ok', res) assert_result_count(res, 1, action='add', path=opj(ds.path, 'empty2'), type='file')
def test_rerun_just_one_commit(path): ds = Dataset(path).create() if ds.repo.is_managed_branch(): assert_status('impossible', ds.rerun(branch="triggers-abort", on_failure="ignore")) raise SkipTest("Test incompatible with adjusted branch") ds.repo.checkout("orph", options=["--orphan"]) ds.repo.call_git(["reset", "--hard"]) ds.repo.config.reload() ds.run('echo static-content > static') eq_(len(ds.repo.get_revisions("HEAD")), 1) assert_raises(IncompleteResultsError, ds.rerun) assert_raises(IncompleteResultsError, ds.rerun, since="", onto="") # --script propagates the error. with swallow_outputs(): assert_raises(IncompleteResultsError, ds.rerun, since="", onto="", script="-") # --dry-run propagates the error. assert_raises(IncompleteResultsError, ds.rerun, since="", onto="", report=True, return_type="list")
def test_update_unborn_master(path): ds_a = Dataset(op.join(path, "ds-a")).create() ds_a.repo.call_git(["branch", "-m", DEFAULT_BRANCH, "other"]) ds_a.repo.checkout(DEFAULT_BRANCH, options=["--orphan"]) ds_b = install(source=ds_a.path, path=op.join(path, "ds-b")) ds_a.repo.checkout("other") (ds_a.pathobj / "foo").write_text("content") ds_a.save() # clone() will try to switch away from an unborn branch if there # is another ref available. Reverse these efforts so that we can # test that update() fails reasonably here because we should still # be able to update from remotes that datalad didn't clone. ds_b.repo.update_ref("HEAD", "refs/heads/" + DEFAULT_BRANCH, symbolic=True) assert_false(ds_b.repo.commit_exists("HEAD")) assert_status("impossible", ds_b.update(merge=True, on_failure="ignore")) ds_b.repo.checkout("other") assert_status("ok", ds_b.update(merge=True, on_failure="ignore")) eq_(ds_a.repo.get_hexsha(), ds_b.repo.get_hexsha())
def test_uninstall_invalid(path): ds = Dataset(path).create(force=True) # no longer a uniform API for uninstall, drop, and remove for method in (uninstall,): # remove, drop): with chpwd(ds.path): assert_status('error', method(on_failure='ignore')) # refuse to touch stuff outside the dataset assert_status('error', method(dataset=ds, path='..', on_failure='ignore')) # same if it doesn't exist, for consistency assert_status('error', method(dataset=ds, path='../madeupnonexist', on_failure='ignore'))
def test_basics(path, nodspath): ds = Dataset(path).create() last_state = ds.repo.get_hexsha() # run inside the dataset with chpwd(path), \ swallow_outputs(): # provoke command failure with assert_raises(CommandError) as cme: ds.run('7i3amhmuch9invalid') # let's not speculate that the exit code is always 127 ok_(cme.code > 0) eq_(last_state, ds.repo.get_hexsha()) # now one that must work res = ds.run('touch empty', message='TEST') ok_clean_git(ds.path) assert_result_count(res, 2) # TODO 'state' is still untracked!!! assert_result_count(res, 1, action='add', path=opj(ds.path, 'empty'), type='file') assert_result_count(res, 1, action='save', path=ds.path) commit_msg = ds.repo.repo.head.commit.message ok_(commit_msg.startswith('[DATALAD RUNCMD] TEST')) # crude test that we have a record for the PWD assert_in('"pwd": "."', commit_msg) last_state = ds.repo.get_hexsha() # now run a command that will not alter the dataset res = ds.run('touch empty', message='NOOP_TEST') assert_status('notneeded', res) eq_(last_state, ds.repo.get_hexsha()) # We can also run the command via a single-item list because this is # what the CLI interface passes in for quoted commands. res = ds.run(['touch empty'], message='NOOP_TEST') assert_status('notneeded', res) # run outside the dataset, should still work but with limitations with chpwd(nodspath), \ swallow_outputs(): res = ds.run(['touch', 'empty2'], message='TEST') assert_status('ok', res) assert_result_count(res, 1, action='add', path=opj(ds.path, 'empty2'), type='file') # running without a command is a noop with chpwd(path): with swallow_logs(new_level=logging.WARN) as cml: ds.run() assert_in("No command given", cml.out)
def test_backup_archive(path, objtree, archivremote): """Similar to test_archive_layout(), but not focused on compatibility with the directory-type special remote. Instead, it tests build a second RIA remote from an existing one, e.g. for backup purposes. """ ds = create(path) setup_archive_remote(ds.repo, objtree) populate_dataset(ds) ds.save() assert_repo_status(ds.path) # copy files into the RIA archive ds.repo.copy_to('.', 'archive') targetpath = Path(archivremote) / ds.id[:3] / ds.id[3:] / 'archives' targetpath.mkdir(parents=True) subprocess.run( ['7z', 'u', str(targetpath / 'archive.7z'), '.'], cwd=str(Path(objtree) / ds.id[:3] / ds.id[3:] / 'annex' / 'objects'), ) initexternalremote(ds.repo, '7z', 'ria', config={'base-path': archivremote}) # wipe out the initial RIA remote (just for testing if the upcoming # one can fully take over) shutil.rmtree(objtree) # fsck to make git-annex aware of the loss assert_status('error', [ annexjson2result(r, ds) for r in ds.repo.fsck(remote='archive', fast=True) ]) # now only available "here" eq_(len(ds.repo.whereis('one.txt')), 1) # make the backup archive known initexternalremote(ds.repo, 'backup', 'ria', config={'base-path': archivremote}) # now fsck the new remote to get the new special remote indexed assert_status('ok', [ annexjson2result(r, ds) for r in ds.repo.fsck(remote='backup', fast=True) ]) eq_(len(ds.repo.whereis('one.txt')), 2) # now we can drop all content locally, reobtain it, and survive an # fsck ds.drop('.') ds.get('.') assert_status('ok', [annexjson2result(r, ds) for r in ds.repo.fsck()])
def test_add_insufficient_args(path): # no argument: assert_raises(InsufficientArgumentsError, add) # no `path`, no `source`: assert_raises(InsufficientArgumentsError, add, dataset=path) with chpwd(path): res = add(path="some", on_failure='ignore') assert_status('impossible', res) ds = Dataset(opj(path, 'ds')) ds.create() # non-existing path outside assert_status('impossible', ds.add(opj(path, 'outside'), on_failure='ignore')) # existing path outside with open(opj(path, 'outside'), 'w') as f: f.write('doesnt matter') assert_status('impossible', ds.add(opj(path, 'outside'), on_failure='ignore'))
def test_reaggregate(path): ds, subds1, subds2 = _prep_partial_update_ds(path) # the actual job assert_status('ok', ds.meta_aggregate(recursive=True)) # nothing without a modification assert_status('notneeded', ds.meta_aggregate(recursive=True)) # modify subds1 (subds1.pathobj / 'new').write_text(text_type('content')) ds.save(recursive=True) # go for a full re-aggregation, it should do the right thing # and only re-extract from subds1 and the root dataset # as these are the only ones with changes res = ds.meta_aggregate(recursive=True) # we should see three deletions, two for the replaced metadata blobs # of the modified subdataset, and one for the dataset metadata of the super assert_result_count(res, 3, action='delete') # four additions: two new blobs for the subdataset, one dataset # metadata blob for the root, due to a new modification date # and the aggregate catalog assert_result_count(res, 4, action='add') # partial reaggregation has tidied up everything nicely, so a # full aggregation does nothing good_state = ds.repo.get_hexsha() assert_status('notneeded', ds.meta_aggregate(recursive=True)) # given a contraining path with also not trigger any further action eq_(good_state, ds.repo.get_hexsha()) assert_status('notneeded', ds.meta_aggregate(path='down', recursive=True)) eq_(good_state, ds.repo.get_hexsha()) # but we can force extraction and get a selective update for this one # dataset only # not pointing to a subdataset itself, but do recursion from a subdirectory # downwards # but without an actual dataset change, and no change to an extractor's # output nothing will change in the dataset ds.meta_aggregate(path='down', recursive=True, force='extraction') eq_(good_state, ds.repo.get_hexsha())
def test_get_invalid_call(path, file_outside): # no argument at all: assert_raises(InsufficientArgumentsError, get, None) assert_raises(InsufficientArgumentsError, get, []) # invalid dataset: assert_status('impossible', get(None, dataset=path, on_failure='ignore')) # have a plain git: ds = Dataset(path) ds.create(no_annex=True) with open(opj(path, "some.txt"), "w") as f: f.write("whatever") ds.save("some.txt", to_git=True, message="Initial commit.") # make it an annex (remove indicator file that create has placed # in the dataset to make it possible): (ds.pathobj / '.noannex').unlink() AnnexRepo(path, init=True, create=True) # call get again on a file in git: result = ds.get("some.txt") assert_status('notneeded', result) # invalid source: # yoh: but now we would need to add it to annex since clever code first # checks what needs to be fetched at all create_tree(path, {'annexed.dat': 'some'}) ds.save("annexed.dat") ds.repo.drop("annexed.dat", options=['--force']) with assert_raises(RemoteNotAvailableError) as ce: ds.get("annexed.dat", source='MysteriousRemote') eq_("MysteriousRemote", ce.exception.remote) res = ds.get("NotExistingFile.txt", on_failure='ignore') assert_status('impossible', res) assert_message("path does not exist", res) # path outside repo errors as with most other commands: res = ds.get(file_outside, on_failure='ignore') assert_in_results(res, status='impossible', message='path not associated with any dataset')
def test_subdataset_save(path): parent = Dataset(path).create() sub = parent.create('sub') ok_clean_git(parent.path) create_tree(parent.path, { "untracked": 'ignore', 'sub': { "new": "wanted"}}) sub.add('new') # defined state: one untracked, modified (but clean in itself) subdataset ok_clean_git(sub.path) ok_clean_git(parent.path, untracked=['untracked'], index_modified=['sub']) # `save sub` does not save the parent!! with chpwd(parent.path): assert_status('notneeded', save(path=sub.path)) ok_clean_git(parent.path, untracked=['untracked'], index_modified=['sub']) # `save -d .` saves the state change in the subdataset, but leaves any untracked # content alone with chpwd(parent.path): assert_status('ok', parent.save()) ok_clean_git(parent.path, untracked=['untracked']) # get back to the original modified state and check that -S behaves in # exactly the same way create_tree(parent.path, { 'sub': { "new2": "wanted2"}}) sub.add('new2') ok_clean_git(parent.path, untracked=['untracked'], index_modified=['sub']) with chpwd(parent.path): assert_status( # notneeded to save sub, but need to save parent ['ok', 'notneeded'], # the key condition of this test is that no reference dataset is # given! save(path='sub', super_datasets=True)) # save super must not cause untracked content to be commited! ok_clean_git(parent.path, untracked=['untracked'])
def test_rerun(path, nodspath): ds = Dataset(path).create() sub = ds.create('sub') probe_path = opj(sub.path, 'sequence') # run inside the dataset with chpwd(path), \ swallow_outputs(): ds.run('echo x$(cat sub/sequence) > sub/sequence') # command ran once, all clean ok_clean_git(ds.path) eq_('x\n', open(probe_path).read()) # now, for a rerun we can be anywhere, PWD and all are recorded # moreover, rerun must figure out which bits to unlock, even in # subdatasets with chpwd(nodspath), \ swallow_outputs(): ds.rerun() ok_clean_git(ds.path) # ran twice now eq_('xx\n', open(probe_path).read()) # Rerunning from a subdataset skips the command. _, sub_info = get_run_info(ds, sub.repo.repo.head.commit.message) eq_(ds.id, sub_info["dsid"]) assert_result_count(sub.rerun(return_type="list", on_failure="ignore"), 1, status="impossible", action="run", rerun_action="skip") eq_('xx\n', open(probe_path).read()) # Rerun fails with a dirty repo. dirt = opj(path, "dirt") with open(dirt, "w") as fh: fh.write("") assert_status('impossible', ds.rerun(on_failure="ignore")) remove(dirt) ok_clean_git(ds.path) # Make a non-run commit. with open(opj(path, "nonrun-file"), "w") as f: f.write("foo") ds.add("nonrun-file") # Now rerun the buried command. ds.rerun(revision="HEAD~", message="rerun buried") eq_('xxx\n', open(probe_path).read()) # Also check that the messasge override worked. eq_( ds.repo.format_commit("%B").splitlines()[0], "[DATALAD RUNCMD] rerun buried") # Or a range of commits, skipping non-run commits. ds.rerun(since="HEAD~3") eq_('xxxxx\n', open(probe_path).read()) # Or --since= to run all reachable commits. ds.rerun(since="") eq_('xxxxxxxxxx\n', open(probe_path).read()) # We can get back a report of what would happen rather than actually # rerunning anything. report = ds.rerun(since="", report=True, return_type="list") # Nothing changed. eq_('xxxxxxxxxx\n', open(probe_path).read()) assert_result_count(report, 1, rerun_action="skip") report[-1]["commit"] == ds.repo.get_hexsha() # If a file is dropped, we remove it instead of unlocking it. ds.drop(probe_path, check=False) with swallow_outputs(): ds.rerun() eq_('x\n', open(probe_path).read()) # If the history to rerun has a merge commit, we abort. ds.repo.checkout("HEAD~3", options=["-b", "topic"]) with open(opj(path, "topic-file"), "w") as f: f.write("topic") ds.add("topic-file") ds.repo.checkout("master") ds.repo.merge("topic") ok_clean_git(ds.path) assert_raises(IncompleteResultsError, ds.rerun)
def test_rerun_empty_branch(path): GitRepo(path, create=True) ds = Dataset(path) assert_status("impossible", ds.rerun(on_failure="ignore"))
def test_aggregation(path): with chpwd(path): assert_raises(InsufficientArgumentsError, aggregate_metadata, None) # a hierarchy of three (super/sub)datasets, each with some native metadata ds = Dataset(opj(path, 'origin')).create(force=True) # before anything aggregated we would get nothing and only a log warning with swallow_logs(new_level=logging.WARNING) as cml: assert_equal(list(query_aggregated_metadata('all', ds, [])), []) assert_re_in('.*Found no aggregated metadata.*update', cml.out) ds.config.add('datalad.metadata.nativetype', 'frictionless_datapackage', where='dataset') subds = ds.create('sub', force=True) subds.config.add('datalad.metadata.nativetype', 'frictionless_datapackage', where='dataset') subsubds = subds.create('subsub', force=True) subsubds.config.add('datalad.metadata.nativetype', 'frictionless_datapackage', where='dataset') ds.save(recursive=True) assert_repo_status(ds.path) # aggregate metadata from all subdatasets into any superdataset, including # intermediate ones res = ds.aggregate_metadata(recursive=True, update_mode='all') # we get success report for both subdatasets and the superdataset, # and they get saved assert_result_count(res, 3, status='ok', action='aggregate_metadata') assert_in_results(res, action='save', status="ok") # nice and tidy assert_repo_status(ds.path) # quick test of aggregate report aggs = ds.metadata(get_aggregates=True) # one for each dataset assert_result_count(aggs, 3) # mother also report layout version assert_result_count(aggs, 1, path=ds.path, layout_version=1) # store clean direct result origres = ds.metadata(recursive=True) # basic sanity check assert_result_count(origres, 6) assert_result_count(origres, 3, type='dataset') assert_result_count(origres, 3, type='file') # Now that we have annex.key # three different IDs assert_equal( 3, len(set([s['dsid'] for s in origres if s['type'] == 'dataset']))) # and we know about all three datasets for name in ('MOTHER_äöü東', 'child_äöü東', 'grandchild_äöü東'): assert_true( sum([s['metadata']['frictionless_datapackage']['name'] \ == ensure_unicode(name) for s in origres if s['type'] == 'dataset'])) # now clone the beast to simulate a new user installing an empty dataset clone = install(opj(path, 'clone'), source=ds.path, result_xfm='datasets', return_type='item-or-list') # ID mechanism works assert_equal(ds.id, clone.id) # get fresh metadata cloneres = clone.metadata() # basic sanity check assert_result_count(cloneres, 2) assert_result_count(cloneres, 1, type='dataset') assert_result_count(cloneres, 1, type='file') # now loop over the previous results from the direct metadata query of # origin and make sure we get the extact same stuff from the clone _compare_metadata_helper(origres, clone) # now obtain a subdataset in the clone, should make no difference assert_status('ok', clone.install('sub', result_xfm=None, return_type='list')) _compare_metadata_helper(origres, clone) # test search in search tests, not all over the place ## query smoke test assert_result_count(clone.search('mother', mode='egrep'), 1) assert_result_count(clone.search('(?i)MoTHER', mode='egrep'), 1) child_res = clone.search('child', mode='egrep') assert_result_count(child_res, 2) for r in child_res: if r['type'] == 'dataset': assert_in(r['query_matched']['frictionless_datapackage.name'], r['metadata']['frictionless_datapackage']['name'])
def _test_version_check(host, dspath, store): dspath = Path(dspath) store = Path(store) ds = Dataset(dspath).create() populate_dataset(ds) ds.save() assert_repo_status(ds.path) # set up store: io = SSHRemoteIO(host) if host else LocalIO() if host: store_url = "ria+ssh://{host}{path}".format(host=host, path=store) else: store_url = "ria+{}".format(store.as_uri()) create_store(io, store, '1') # TODO: Re-establish test for version 1 # version 2: dirhash create_ds_in_store(io, store, ds.id, '2', '1') # add special remote init_opts = common_init_opts + ['url={}'.format(store_url)] ds.repo.init_remote('store', options=init_opts) ds.repo.copy_to('.', 'store') # check version files remote_ds_tree_version_file = store / 'ria-layout-version' dsgit_dir, archive_dir, dsobj_dir = \ get_layout_locations(1, store, ds.id) remote_obj_tree_version_file = dsgit_dir / 'ria-layout-version' assert_true(remote_ds_tree_version_file.exists()) assert_true(remote_obj_tree_version_file.exists()) with open(str(remote_ds_tree_version_file), 'r') as f: assert_equal(f.read().strip(), '1') with open(str(remote_obj_tree_version_file), 'r') as f: assert_equal(f.read().strip(), '2') # Accessing the remote should not yield any output regarding versioning, # since it's the "correct" version. Note that "fsck" is an arbitrary choice. # We need just something to talk to the special remote. with swallow_logs(new_level=logging.INFO) as cml: ds.repo.fsck(remote='store', fast=True) # TODO: For some reason didn't get cml.assert_logged to assert # "nothing was logged" assert not cml.out # Now fake-change the version with open(str(remote_obj_tree_version_file), 'w') as f: f.write('X\n') # Now we should see a message about it with swallow_logs(new_level=logging.INFO) as cml: ds.repo.fsck(remote='store', fast=True) cml.assert_logged(level="INFO", msg="Remote object tree reports version X", regex=False) # reading still works: ds.drop('.') assert_status('ok', ds.get('.')) # but writing doesn't: with open(str(Path(ds.path) / 'new_file'), 'w') as f: f.write("arbitrary addition") ds.save(message="Add a new_file") # TODO: use self.annex.error in special remote and see whether we get an # actual error result assert_raises(CommandError, ds.repo.copy_to, 'new_file', 'store') # However, we can force it by configuration ds.config.add("annex.ora-remote.store.force-write", "true", where='local') ds.repo.copy_to('new_file', 'store')
def test_push_recursive(origin_path, src_path, dst_top, dst_sub, dst_subnoannex, dst_subsub): # dataset with two submodules and one subsubmodule origin = Dataset(origin_path).create() origin_subm1 = origin.create('sub m') origin_subm1.create('subsub m') origin.create('subm noannex', annex=False) origin.save() assert_repo_status(origin.path) # prepare src as a fresh clone with all subdatasets checkout out recursively # running on a clone should make the test scenario more different than # test_push(), even for the pieces that should be identical top = Clone.__call__(source=origin.path, path=src_path) sub, subsub, subnoannex = top.get('.', recursive=True, get_data=False, result_xfm='datasets') target_top = mk_push_target(top, 'target', dst_top, annex=True) # subdatasets have no remote yet, so recursive publishing should fail: res = top.push(to="target", recursive=True, on_failure='ignore') assert_in_results(res, path=top.path, type='dataset', refspec='refs/heads/master:refs/heads/master', operations=['new-branch'], action='publish', status='ok', target='target') for d in (sub, subsub, subnoannex): assert_in_results(res, status='error', type='dataset', path=d.path, message=("Unknown target sibling '%s'.", 'target')) # now fix that and set up targets for the submodules target_sub = mk_push_target(sub, 'target', dst_sub, annex=True) target_subnoannex = mk_push_target(subnoannex, 'target', dst_subnoannex, annex=False) target_subsub = mk_push_target(subsub, 'target', dst_subsub, annex=True) # and same push call as above res = top.push(to="target", recursive=True) # topds skipped assert_in_results(res, path=top.path, type='dataset', action='publish', status='notneeded', target='target') # the rest pushed for d in (sub, subsub, subnoannex): assert_in_results(res, status='ok', type='dataset', path=d.path, refspec='refs/heads/master:refs/heads/master') # all correspondig branches match across all datasets for s, d in zip( (top, sub, subnoannex, subsub), (target_top, target_sub, target_subnoannex, target_subsub)): eq_(list(s.repo.get_branch_commits_("master")), list(d.get_branch_commits_("master"))) if s != subnoannex: eq_(list(s.repo.get_branch_commits_("git-annex")), list(d.get_branch_commits_("git-annex"))) # rerun should not result in further pushes of master res = top.push(to="target", recursive=True) assert_not_in_results(res, status='ok', refspec="refs/heads/master:refs/heads/master") assert_in_results(res, status='notneeded', refspec="refs/heads/master:refs/heads/master") if top.repo.is_managed_branch(): raise SkipTest( 'Save/status of subdataset with managed branches is an still ' 'unresolved issue') # now annex a file in subsub test_copy_file = subsub.pathobj / 'test_mod_annex_file' test_copy_file.write_text("Heavy stuff.") # save all the way up assert_status(('ok', 'notneeded'), top.save(message='subsub got something', recursive=True)) assert_repo_status(top.path) # publish straight up, should be smart by default res = top.push(to="target", recursive=True) # we see 3 out of 4 datasets pushed (sub noannex was left unchanged) for d in (top, sub, subsub): assert_in_results(res, status='ok', type='dataset', path=d.path, refspec='refs/heads/master:refs/heads/master') # file content copied too assert_in_results(res, action='copy', status='ok', path=str(test_copy_file)) # verify it is accessible, drop and bring back assert_status('ok', top.drop(str(test_copy_file))) ok_(not subsub.repo.file_has_content('test_mod_annex_file')) top.get(test_copy_file) ok_file_has_content(test_copy_file, 'Heavy stuff.') # make two modification (sub.pathobj / 'test_mod_annex_file').write_text('annex') (subnoannex.pathobj / 'test_mod_file').write_text('git') # save separately top.save(sub.pathobj, message='annexadd', recursive=True) top.save(subnoannex.pathobj, message='gitadd', recursive=True) # now only publish the latter one res = top.push(to="target", since='HEAD~1', recursive=True) # nothing copied, no reports on the other modification assert_not_in_results(res, action='copy') assert_not_in_results(res, path=sub.path) for d in (top, subnoannex): assert_in_results(res, status='ok', type='dataset', path=d.path, refspec='refs/heads/master:refs/heads/master') # an unconditional push should now pick up the remaining changes res = top.push(to="target", recursive=True) assert_in_results(res, action='copy', status='ok', path=str(sub.pathobj / 'test_mod_annex_file')) assert_in_results(res, status='ok', type='dataset', path=sub.path, refspec='refs/heads/master:refs/heads/master') for d in (top, subnoannex, subsub): assert_in_results(res, status='notneeded', type='dataset', path=d.path, refspec='refs/heads/master:refs/heads/master')
def test_update_simple(origin, src_path, dst_path): # prepare src source = install(src_path, source=origin, recursive=True) # forget we cloned it (provide no 'origin' anymore), which should lead to # setting tracking branch to target: source.repo.remove_remote("origin") # get a clone to update later on: dest = install(dst_path, source=src_path, recursive=True) # test setup done; # assert all fine ok_clean_git(dst_path) ok_clean_git(src_path) # update yields nothing => up-to-date assert_status('ok', dest.update()) ok_clean_git(dst_path) # modify origin: with open(opj(src_path, "update.txt"), "w") as f: f.write("Additional content") source.add(path="update.txt") source.save("Added update.txt") ok_clean_git(src_path) # update without `merge` only fetches: assert_status('ok', dest.update()) # modification is not known to active branch: assert_not_in("update.txt", dest.repo.get_files(dest.repo.get_active_branch())) # modification is known to branch origin/master assert_in("update.txt", dest.repo.get_files("origin/master")) # merge: assert_status('ok', dest.update(merge=True)) # modification is now known to active branch: assert_in("update.txt", dest.repo.get_files(dest.repo.get_active_branch())) # it's known to annex, but has no content yet: dest.repo.get_file_key("update.txt") # raises if unknown eq_([False], dest.repo.file_has_content(["update.txt"])) # smoke-test if recursive update doesn't fail if submodule is removed # and that we can run it from within a dataset without providing it # explicitly assert_result_count(dest.remove('subm 1'), 1, status='ok', action='remove', path=opj(dest.path, 'subm 1')) with chpwd(dest.path): assert_result_count(update(recursive=True), 2, status='ok', type='dataset') assert_result_count(dest.update(merge=True, recursive=True), 2, status='ok', type='dataset') # and now test recursive update with merging in differences create_tree(opj(source.path, '2'), {'load.dat': 'heavy'}) source.add(opj('2', 'load.dat'), message="saving changes within subm2", recursive=True) assert_result_count(dest.update(merge=True, recursive=True), 2, status='ok', type='dataset') # and now we can get new file dest.get('2/load.dat') ok_file_has_content(opj(dest.path, '2', 'load.dat'), 'heavy')
def test_diff_recursive(path): ds = Dataset(path).create() sub = ds.create('sub') # look at the last change, and confirm a dataset was added res = ds.diff(revision='HEAD~1..HEAD') assert_result_count(res, 1, action='diff', state='added', path=sub.path, type='dataset') # now recursive res = ds.diff(recursive=True, revision='HEAD~1..HEAD') # we also get the entire diff of the subdataset from scratch assert_status('ok', res) ok_(len(res) > 3) # one specific test assert_result_count(res, 1, action='diff', state='added', path=opj(sub.path, '.datalad', 'config')) # now we add a file to just the parent create_tree(ds.path, { 'onefile': 'tobeadded', 'sub': { 'twofile': 'tobeadded' } }) res = ds.diff(recursive=True, report_untracked='all') assert_result_count(res, 3) assert_result_count(res, 1, action='diff', state='untracked', path=opj(ds.path, 'onefile'), type='file') assert_result_count(res, 1, action='diff', state='modified', path=sub.path, type='dataset') assert_result_count(res, 1, action='diff', state='untracked', path=opj(sub.path, 'twofile'), type='file') # save sub sub.add('.') # save sub in parent ds.save() # save addition in parent ds.add('.') ok_clean_git(ds.path) # look at the last change, only one file was added res = ds.diff(revision='HEAD~1..HEAD') assert_result_count(res, 1) assert_result_count(res, 1, action='diff', state='added', path=opj(ds.path, 'onefile'), type='file') # now the exact same thing with recursion, must not be different from the call # above res = ds.diff(recursive=True, revision='HEAD~1..HEAD') assert_result_count(res, 1) # last change in parent assert_result_count(res, 1, action='diff', state='added', path=opj(ds.path, 'onefile'), type='file') # one further back brings in the modified subdataset, and the added file within it res = ds.diff(recursive=True, revision='HEAD~2..HEAD') assert_result_count(res, 3) assert_result_count(res, 1, action='diff', state='added', path=opj(ds.path, 'onefile'), type='file') assert_result_count(res, 1, action='diff', state='added', path=opj(sub.path, 'twofile'), type='file') assert_result_count(res, 1, action='diff', state='modified', path=sub.path, type='dataset')
def test_update_simple(origin, src_path, dst_path): # prepare src source = install(src_path, source=origin, recursive=True) # forget we cloned it (provide no 'origin' anymore), which should lead to # setting tracking branch to target: source.repo.remove_remote("origin") # dataset without sibling will not need updates assert_status('notneeded', source.update()) # deprecation message doesn't ruin things assert_status('notneeded', source.update(fetch_all=True)) # but error if unknown sibling is given assert_status('impossible', source.update(sibling='funky', on_failure='ignore')) # get a clone to update later on: dest = install(dst_path, source=src_path, recursive=True) # test setup done; # assert all fine assert_repo_status(dst_path) assert_repo_status(src_path) # update yields nothing => up-to-date assert_status('ok', dest.update()) assert_repo_status(dst_path) # modify origin: with open(opj(src_path, "update.txt"), "w") as f: f.write("Additional content") source.save(path="update.txt", message="Added update.txt") assert_repo_status(src_path) # update without `merge` only fetches: assert_status('ok', dest.update()) # modification is not known to active branch: assert_not_in("update.txt", dest.repo.get_files(dest.repo.get_active_branch())) # modification is known to branch origin/<default branch> assert_in("update.txt", dest.repo.get_files("origin/" + DEFAULT_BRANCH)) # merge: assert_status('ok', dest.update(merge=True)) # modification is now known to active branch: assert_in("update.txt", dest.repo.get_files(dest.repo.get_active_branch())) # it's known to annex, but has no content yet: dest.repo.get_file_key("update.txt") # raises if unknown eq_([False], dest.repo.file_has_content(["update.txt"])) # check subdataset path constraints, baseline (parent + 2 subds) assert_result_count(dest.update(recursive=True), 3, status='ok', type='dataset') # no recursion and invalid path still updates the parent res = dest.update(path='whatever') assert_result_count(res, 1, status='ok', type='dataset') assert_result_count(res, 1, status='ok', path=dest.path) # invalid path with recursion also does res = dest.update(recursive=True, path='whatever') assert_result_count(res, 1, status='ok', type='dataset') assert_result_count(res, 1, status='ok', path=dest.path) # valid path and no recursion only updates the parent res = dest.update(path='subm 1') assert_result_count(res, 1, status='ok', type='dataset') assert_result_count(res, 1, status='ok', path=dest.path) # valid path and recursion updates matching res = dest.update(recursive=True, path='subm 1') assert_result_count(res, 2, status='ok', type='dataset') assert_result_count(res, 1, status='ok', path=dest.path) assert_result_count(res, 1, status='ok', path=str(dest.pathobj / 'subm 1')) # additional invalid path doesn't hurt res = dest.update(recursive=True, path=['subm 1', 'mike']) assert_result_count(res, 2, status='ok', type='dataset') # full match res = dest.update(recursive=True, path=['subm 1', '2']) assert_result_count(res, 3, status='ok', type='dataset') # test that update doesn't crash if we specify only a single path (submod) to # operate on with chpwd(dest.path): # in 0.11.x it would be a single result since "pwd" dataset is not # considered, and would be relative path (as specified). # In 0.12.0 - it would include implicit pwd dataset, and paths would be absolute res_update = update(path=['subm 1'], recursive=True) assert_result_count(res_update, 2) for p in dest.path, str(dest.pathobj / 'subm 1'): assert_in_results(res_update, path=p, action='update', status='ok', type='dataset') # and with merge we would also try to save (but there would be no changes) res_merge = update(path=['subm 1'], recursive=True, merge=True) assert_result_count(res_merge, 2, action='update') # 2 of "updates" really. assert_in_results(res_merge, action='update', status='ok', type='dataset') assert_in_results(res_merge, action='save', status='notneeded', type='dataset') # smoke-test if recursive update doesn't fail if submodule is removed # and that we can run it from within a dataset without providing it # explicitly assert_result_count(dest.remove('subm 1'), 1, status='ok', action='remove', path=opj(dest.path, 'subm 1')) with chpwd(dest.path): assert_result_count(update(recursive=True), 2, status='ok', type='dataset') assert_result_count(dest.update(merge=True, recursive=True), 2, action='update', status='ok', type='dataset') # and now test recursive update with merging in differences create_tree(opj(source.path, '2'), {'load.dat': 'heavy'}) source.save(opj('2', 'load.dat'), message="saving changes within subm2", recursive=True) assert_result_count(dest.update(merge=True, recursive=True), 2, action='update', status='ok', type='dataset') # and now we can get new file dest.get('2/load.dat') ok_file_has_content(opj(dest.path, '2', 'load.dat'), 'heavy')
def test_unlock(path): ds = Dataset(path) # file is currently locked: # TODO: use get_annexed_files instead of hardcoded filename assert_raises(IOError, open, opj(path, 'test-annex.dat'), "w") # in direct mode there is no unlock: if ds.repo.is_direct_mode(): res = ds.unlock() assert_result_count(res, 1) assert_status('notneeded', res) # in V6 we can unlock even if the file's content isn't present: elif ds.repo.config.getint("annex", "version") == 6: res = ds.unlock() assert_result_count(res, 1) assert_status('ok', res) # TODO: RF: make 'lock' a command as well # re-lock to further on have a consistent situation with V5: ds.repo._git_custom_command('test-annex.dat', ['git', 'annex', 'lock']) else: # cannot unlock without content (annex get wasn't called) assert_raises(CommandError, ds.unlock) # FIXME ds.repo.get('test-annex.dat') result = ds.unlock() assert_result_count(result, 1) if ds.repo.is_direct_mode(): assert_status('notneeded', result) else: assert_in_results(result, path=opj(ds.path, 'test-annex.dat'), status='ok') with open(opj(path, 'test-annex.dat'), "w") as f: f.write("change content") ds.repo.add('test-annex.dat') # in V6 we need to explicitly re-lock it: if ds.repo.config.getint("annex", "version") == 6: # TODO: RF: make 'lock' a command as well # re-lock to further on have a consistent situation with V5: ds.repo._git_custom_command('test-annex.dat', ['git', 'annex', 'lock']) ds.repo.commit("edit 'test-annex.dat' via unlock and lock it again") if not ds.repo.is_direct_mode(): # after commit, file is locked again: assert_raises(IOError, open, opj(path, 'test-annex.dat'), "w") # content was changed: with open(opj(path, 'test-annex.dat'), "r") as f: eq_("change content", f.read()) # unlock again, this time more specific: result = ds.unlock(path='test-annex.dat') assert_result_count(result, 1) if ds.repo.is_direct_mode(): assert_in_results(result, path=opj(ds.path, 'test-annex.dat'), status='notneeded') else: assert_in_results(result, path=opj(ds.path, 'test-annex.dat'), status='ok') with open(opj(path, 'test-annex.dat'), "w") as f: f.write("change content again") ds.repo.add('test-annex.dat') # in V6 we need to explicitly re-lock it: if ds.repo.config.getint("annex", "version") == 6: # TODO: RF: make 'lock' a command as well # re-lock to further on have a consistent situation with V5: ds.repo._git_custom_command('test-annex.dat', ['git', 'annex', 'lock']) ds.repo.commit("edit 'test-annex.dat' via unlock and lock it again") # TODO: # BOOOM: test-annex.dat writeable in V6! # Why the hell is this different than the first time we wrote to the file # and locked it again? # Also: After opening the file is empty. if not ds.repo.is_direct_mode(): # after commit, file is locked again: assert_raises(IOError, open, opj(path, 'test-annex.dat'), "w") # content was changed: with open(opj(path, 'test-annex.dat'), "r") as f: eq_("change content again", f.read())
def test_diff(path, norepo): with chpwd(norepo): assert_status('impossible', diff(on_failure='ignore')) ds = Dataset(path).create() ok_clean_git(ds.path) # reports stupid revision input assert_result_count(ds.diff(revision='WTF', on_failure='ignore'), 1, status='impossible', message="fatal: bad revision 'WTF'") assert_result_count(ds.diff(), 0) # no diff assert_result_count(ds.diff(), 0) assert_result_count(ds.diff(revision='HEAD'), 0) # bogus path makes no difference assert_result_count(ds.diff(path='THIS', revision='HEAD'), 0) # comparing to a previous state we should get a diff in most cases # for this test, let's not care what exactly it is -- will do later assert len(ds.diff(revision='HEAD~1')) > 0 # let's introduce a known change create_tree(ds.path, {'new': 'empty'}) ds.add('.', to_git=True) ok_clean_git(ds.path) res = ds.diff(revision='HEAD~1') assert_result_count(res, 1) assert_result_count(res, 1, action='diff', path=opj(ds.path, 'new'), state='added') # we can also find the diff without going through the dataset explicitly with chpwd(ds.path): assert_result_count(diff(revision='HEAD~1'), 1, action='diff', path=opj(ds.path, 'new'), state='added') # no diff against HEAD assert_result_count(ds.diff(), 0) # modify known file create_tree(ds.path, {'new': 'notempty'}) for diffy in (None, 'HEAD'): res = ds.diff(revision=diffy) assert_result_count(res, 1) assert_result_count(res, 1, action='diff', path=opj(ds.path, 'new'), state='modified') # but if we give another path, it doesn't show up assert_result_count(ds.diff('otherpath'), 0) # giving the right path must work though assert_result_count(ds.diff('new'), 1, action='diff', path=opj(ds.path, 'new'), state='modified') # stage changes ds.add('.', to_git=True, save=False) # no diff, because we staged the modification assert_result_count(ds.diff(), 0) # but we can get at it assert_result_count(ds.diff(staged=True), 1, action='diff', path=opj(ds.path, 'new'), state='modified') # OR assert_result_count(ds.diff(revision='HEAD'), 1, action='diff', path=opj(ds.path, 'new'), state='modified') ds.save() ok_clean_git(ds.path) # untracked stuff create_tree(ds.path, {'deep': {'down': 'untracked', 'down2': 'tobeadded'}}) # a plain diff should report the untracked file # but not directly, because the parent dir is already unknown res = ds.diff() assert_result_count(res, 1) assert_result_count(res, 1, state='untracked', type='directory', path=opj(ds.path, 'deep')) # report of individual files is also possible assert_result_count(ds.diff(report_untracked='all'), 2, state='untracked', type='file') # an unmatching path will hide this result assert_result_count(ds.diff(path='somewhere'), 0) # perfect match and anything underneath will do assert_result_count(ds.diff(path='deep'), 1, state='untracked', path=opj(ds.path, 'deep'), type='directory') assert_result_count(ds.diff(path='deep'), 1, state='untracked', path=opj(ds.path, 'deep')) # now we stage on of the two files in deep ds.add(opj('deep', 'down2'), to_git=True, save=False) # without any reference it will ignore the staged stuff and report the remaining # untracked file assert_result_count(ds.diff(), 1, state='untracked', path=opj(ds.path, 'deep', 'down'), type='file') res = ds.diff(staged=True) assert_result_count(res, 1, state='untracked', path=opj(ds.path, 'deep', 'down'), type='file') assert_result_count(res, 1, state='added', path=opj(ds.path, 'deep', 'down2'), type='file')
def test_diff_recursive(path): ds = Dataset(path).create() sub = ds.create('sub') # look at the last change, and confirm a dataset was added res = ds.diff(fr=DEFAULT_BRANCH + '~1', to=DEFAULT_BRANCH, result_renderer=None) assert_result_count(res, 1, action='diff', state='added', path=sub.path, type='dataset') # now recursive res = ds.diff(recursive=True, fr=DEFAULT_BRANCH + '~1', to=DEFAULT_BRANCH, result_renderer=None) # we also get the entire diff of the subdataset from scratch assert_status('ok', res) ok_(len(res) > 3) # one specific test assert_result_count(res, 1, action='diff', state='added', path=op.join(sub.path, '.datalad', 'config')) # now we add a file to just the parent create_tree(ds.path, { 'onefile': 'tobeadded', 'sub': { 'twofile': 'tobeadded' } }) res = ds.diff(recursive=True, untracked='all', result_renderer=None) assert_result_count(_dirty_results(res), 3) assert_result_count(res, 1, action='diff', state='untracked', path=op.join(ds.path, 'onefile'), type='file') assert_result_count(res, 1, action='diff', state='modified', path=sub.path, type='dataset') assert_result_count(res, 1, action='diff', state='untracked', path=op.join(sub.path, 'twofile'), type='file') # intentional save in two steps to make check below easier ds.save('sub', recursive=True) ds.save() assert_repo_status(ds.path) head_ref = DEFAULT_BRANCH if ds.repo.is_managed_branch() else 'HEAD' # look at the last change, only one file was added res = ds.diff(fr=head_ref + '~1', to=head_ref, result_renderer=None) assert_result_count(_dirty_results(res), 1) assert_result_count(res, 1, action='diff', state='added', path=op.join(ds.path, 'onefile'), type='file') # now the exact same thing with recursion, must not be different from the # call above res = ds.diff(recursive=True, fr=head_ref + '~1', to=head_ref, result_renderer=None) assert_result_count(_dirty_results(res), 1) # last change in parent assert_result_count(res, 1, action='diff', state='added', path=op.join(ds.path, 'onefile'), type='file') if ds.repo.is_managed_branch(): raise SkipTest( "Test assumption broken: https://github.com/datalad/datalad/issues/3818" ) # one further back brings in the modified subdataset, and the added file # within it res = ds.diff(recursive=True, fr=head_ref + '~2', to=head_ref, result_renderer=None) assert_result_count(_dirty_results(res), 3) assert_result_count(res, 1, action='diff', state='added', path=op.join(ds.path, 'onefile'), type='file') assert_result_count(res, 1, action='diff', state='added', path=op.join(sub.path, 'twofile'), type='file') assert_result_count(res, 1, action='diff', state='modified', path=sub.path, type='dataset')
def test_publish_recursive(pristine_origin, origin_path, src_path, dst_path, sub1_pub, sub2_pub): # we will be publishing back to origin, so to not alter testrepo # we will first clone it origin = install(origin_path, source=pristine_origin, recursive=True) # prepare src source = install(src_path, source=origin.path, recursive=True) # we will be trying to push into this later on, need to give permissions... origin_sub2 = Dataset(opj(origin_path, '2')) origin_sub2.config.set('receive.denyCurrentBranch', 'updateInstead', where='local') ## TODO this manual fixup is needed due to gh-1548 -- needs proper solution #os.remove(opj(origin_sub2.path, '.git')) #os.rename(opj(origin_path, '.git', 'modules', '2'), opj(origin_sub2.path, '.git')) # create plain git at target: target = GitRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) # subdatasets have no remote yet, so recursive publishing should fail: res = publish(dataset=source, to="target", recursive=True, on_failure='ignore') assert_result_count(res, 3) assert_result_count(res, 1, status='ok', type='dataset', path=source.path) assert_result_count(res, 2, status='error', message=("Unknown target sibling '%s' for publication", 'target')) # now, set up targets for the submodules: sub1_target = GitRepo(sub1_pub, create=True) sub1_target.checkout("TMP", ["-b"]) sub2_target = AnnexRepo(sub2_pub, create=True) # we will be testing presence of the file content, so let's make it progress sub2_target.config.set('receive.denyCurrentBranch', 'updateInstead', where='local') sub1 = GitRepo(opj(src_path, 'subm 1'), create=False) sub2 = GitRepo(opj(src_path, '2'), create=False) sub1.add_remote("target", sub1_pub) sub2.add_remote("target", sub2_pub) # publish recursively with swallow_logs(new_level=logging.DEBUG) as cml: res = publish(dataset=source, to="target", recursive=True) assert_not_in('forced update', cml.out, "we probably haven't merged git-annex before pushing") # testing result list # base dataset was already published above, notneeded again assert_status(('ok', 'notneeded'), res) # nothing failed assert_result_count(res, 3, type='dataset') eq_({r['path'] for r in res}, {src_path, sub1.path, sub2.path}) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) eq_(list(target.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) eq_(list(sub1_target.get_branch_commits("master")), list(sub1.get_branch_commits("master"))) eq_(list(sub1_target.get_branch_commits("git-annex")), list(sub1.get_branch_commits("git-annex"))) eq_(list(sub2_target.get_branch_commits("master")), list(sub2.get_branch_commits("master"))) eq_(list(sub2_target.get_branch_commits("git-annex")), list(sub2.get_branch_commits("git-annex"))) # we are tracking origin but origin has different git-annex, since we # cloned from it, so it is not aware of our git-annex neq_(list(origin.repo.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) # So if we first publish to it recursively, we would update # all sub-datasets since git-annex branch would need to be pushed res_ = publish(dataset=source, recursive=True) assert_result_count(res_, 1, status='ok', path=source.path) assert_result_count(res_, 1, status='ok', path=sub1.path) assert_result_count(res_, 1, status='ok', path=sub2.path) # and now should carry the same state for git-annex eq_(list(origin.repo.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) # test for publishing with --since. By default since no changes, nothing pushed res_ = publish(dataset=source, recursive=True) assert_result_count(res_, 3, status='notneeded', type='dataset') # still nothing gets pushed, because origin is up to date res_ = publish(dataset=source, recursive=True, since='HEAD^') assert_result_count(res_, 3, status='notneeded', type='dataset') # and we should not fail if we run it from within the dataset with chpwd(source.path): res_ = publish(recursive=True, since='HEAD^') assert_result_count(res_, 3, status='notneeded', type='dataset') # Let's now update one subm with open(opj(sub2.path, "file.txt"), 'w') as f: f.write('') # add to subdataset, does not alter super dataset! # MIH: use `to_git` because original test author used # and explicit `GitRepo.add` -- keeping this for now Dataset(sub2.path).add('file.txt', to_git=True) # Let's now update one subm create_tree(sub2.path, {'file.dat': 'content'}) # add to subdataset, without reflecting the change in its super(s) Dataset(sub2.path).add('file.dat') # note: will publish to origin here since that is what it tracks res_ = publish(dataset=source, recursive=True, on_failure='ignore') ## only updates published, i.e. just the subdataset, super wasn't altered ## nothing copied! assert_status(('ok', 'notneeded'), res_) assert_result_count(res_, 1, status='ok', path=sub2.path, type='dataset') assert_result_count(res_, 0, path=opj(sub2.path, 'file.dat'), type='file') # since published to origin -- destination should not get that file nok_(lexists(opj(sub2_target.path, 'file.dat'))) res_ = publish(dataset=source, to='target', recursive=True) assert_status(('ok', 'notneeded'), res_) assert_result_count(res_, 1, status='ok', path=sub2.path, type='dataset') assert_result_count(res_, 0, path=opj(sub2.path, 'file.dat'), type='file') # Note: with updateInstead only in target2 and not saving change in # super-dataset we would have made remote dataset, if we had entire # hierarchy, to be somewhat inconsistent. # But here, since target datasets are independent -- it is ok # and the file itself was transferred ok_(lexists(opj(sub2_target.path, 'file.dat'))) nok_(sub2_target.file_has_content('file.dat')) ## but now we can redo publish recursively, with explicitly requested data transfer res_ = publish(dataset=source, to='target', recursive=True, transfer_data='all') ok_(sub2_target.file_has_content('file.dat')) assert_result_count(res_, 1, status='ok', path=opj(sub2.path, 'file.dat')) # Let's save those present changes and publish while implying "since last # merge point" source.save(message="Changes in subm2") # and test if it could deduce the remote/branch to push to source.config.set('branch.master.remote', 'target', where='local') with chpwd(source.path): res_ = publish(since='', recursive=True) # TODO: somehow test that there were no even attempt to diff within "subm 1" # since if `--since=''` worked correctly, nothing has changed there and it # should have not been even touched assert_status(('ok', 'notneeded'), res_) assert_result_count(res_, 1, status='ok', path=source.path, type='dataset')
def test_force_datatransfer(srcpath, dstpath): src = Dataset(srcpath).create() target = mk_push_target(src, 'target', dstpath, annex=True, bare=True) (src.pathobj / 'test_mod_annex_file').write_text("Heavy stuff.") src.save(to_git=False, message="New annex file") assert_repo_status(src.path, annex=True) whereis_prior = src.repo.whereis(files=['test_mod_annex_file'])[0] res = src.push(to='target', force='no-datatransfer') # nothing reported to be copied assert_not_in_results(res, action='copy') # we got the git-push nevertheless eq_(src.repo.get_hexsha('master'), target.get_hexsha('master')) # nothing moved eq_(whereis_prior, src.repo.whereis(files=['test_mod_annex_file'])[0]) # now a push without forced no-transfer # we do not give since, so the non-transfered file is picked up # and transferred res = src.push(to='target', force=None) # no branch change, done before assert_in_results(res, action='publish', status='notneeded', refspec='refs/heads/master:refs/heads/master') # but availability update assert_in_results(res, action='publish', status='ok', refspec='refs/heads/git-annex:refs/heads/git-annex') assert_in_results(res, status='ok', path=str(src.pathobj / 'test_mod_annex_file'), action='copy') # whereis info reflects the change ok_( len(whereis_prior) < len( src.repo.whereis(files=['test_mod_annex_file'])[0])) # do it yet again will do nothing, because all is uptodate assert_status('notneeded', src.push(to='target', force=None)) # an explicit reference point doesn't change that assert_status('notneeded', src.push(to='target', force=None, since='HEAD~1')) # now force data transfer res = src.push(to='target', force='datatransfer') # no branch change, done before assert_in_results(res, action='publish', status='notneeded', refspec='refs/heads/master:refs/heads/master') # no availability update assert_in_results(res, action='publish', status='notneeded', refspec='refs/heads/git-annex:refs/heads/git-annex') # but data transfer assert_in_results(res, status='ok', path=str(src.pathobj / 'test_mod_annex_file'), action='copy') # force data transfer, but data isn't available src.repo.drop('test_mod_annex_file') res = src.push(to='target', path='.', force='datatransfer', on_failure='ignore') assert_in_results(res, status='impossible', path=str(src.pathobj / 'test_mod_annex_file'), action='copy', message='Slated for transport, but no content present')
def _test_bare_git_version_2(host, dspath, store): # Similarly to test_bare_git_version_1, this should ensure a bare git repo # at the store location for a dataset doesn't conflict with the ORA remote. # Note: Usability of git remote by annex depends on dataset layout version # (dirhashlower vs. -mixed). # For version 2 (mixed) upload via ORA and consumption via git should # work. But not the other way around, since git-annex uses # dirhashlower with bare repos. ds_path = Path(dspath) store = Path(store) ds = Dataset(ds_path).create() populate_dataset(ds) ds.save() bare_repo_path, _, _ = get_layout_locations(1, store, ds.id) # Use git to make sure the remote end is what git thinks a bare clone of it # should look like subprocess.run([ 'git', 'clone', '--bare', quote_cmdlinearg(str(dspath)), quote_cmdlinearg(str(bare_repo_path)) ]) if host: url = "ria+ssh://{host}{path}".format(host=host, path=store) else: url = "ria+{}".format(store.as_uri()) init_opts = common_init_opts + ['url={}'.format(url)] # set up store: io = SSHRemoteIO(host) if host else LocalIO() create_store(io, store, '1') # set up the dataset location, too. # Note: Dataset layout version 2 (dirhash mixed): create_ds_in_store(io, store, ds.id, '2', '1') # Now, let's have the bare repo as a git remote git_url = "ssh://{host}{path}".format(host=host, path=bare_repo_path) \ if host else bare_repo_path.as_uri() ds.repo.add_remote('bare-git', git_url) ds.repo.enable_remote('bare-git') # and the ORA remote in addition: ds.repo.init_remote('ora-remote', options=init_opts) # upload keys via ORA: ds.repo.copy_to('.', 'ora-remote') # bare-git doesn't know yet: eq_(len(ds.repo.whereis('one.txt')), 2) # fsck to make availability known assert_status('ok', [ annexjson2result(r, ds) for r in ds.repo.fsck(remote='bare-git', fast=True) ]) eq_(len(ds.repo.whereis('one.txt')), 3) ds.drop('.') eq_(len(ds.repo.whereis('one.txt')), 2) # actually consumable via git remote: ds.repo.call_annex(['move', 'one.txt', '--from', 'bare-git']) eq_(len(ds.repo.whereis('one.txt')), 2) # now, move back via git - shouldn't be consumable via ORA ds.repo.call_annex(['move', 'one.txt', '--to', 'bare-git']) # fsck to make availability known, but there's nothing from POV of ORA: fsck_res = [ annexjson2result(r, ds) for r in ds.repo.fsck(remote='ora-remote', fast=True) ] assert_result_count(fsck_res, 1, status='error', message='** Based on the location log, one.txt\n' '** was expected to be present, ' 'but its content is missing.') assert_result_count(fsck_res, 1, status='ok') eq_(len(fsck_res), 2) eq_(len(ds.repo.whereis('one.txt')), 1)
def test_dicom(path): ds = Dataset(path).create() ds.config.add('datalad.metadata.nativetype', 'dicom', where='dataset') copy( op.join(op.dirname(op.dirname(op.dirname(__file__))), 'tests', 'data', 'files', 'dicom.dcm'), path) ds.save() ok_clean_git(ds.path) res = ds.aggregate_metadata() assert_status('ok', res) # query for the file metadata res = ds.metadata('dicom.dcm') assert_result_count(res, 1) # from this extractor meta = res[0]['metadata']['dicom'] assert_in('@context', meta) # no point in testing ALL keys, but we got plenty assert (len(meta.keys()) > 70) eq_(meta['SeriesDate'], '20070205') # Actually a tricky one of the dcm.multival.MultiValue type # which we should extract as a list # https://github.com/datalad/datalad-neuroimaging/issues/49 eq_(meta['ImageType'], ['ORIGINAL', 'PRIMARY', 'EPI', 'NONE']) # make sure we have PatientName -- this is not using a basic data type, but # dicom.valuerep.PersonName3 -- conversion should have handled that # we can only test if the key is there, the source dicom has an empty # string as value eq_(meta['PatientName'], '') # now ask for the dataset metadata, which should have both the unique props # and a list of imageseries (one in this case, but a list) res = ds.metadata(reporton='datasets') assert_result_count(res, 1) dsmeta = res[0]['metadata']['dicom'] # same context assert_dict_equal(meta['@context'], dsmeta['@context']) meta.pop('@context') seriesmeta = dsmeta['Series'] eq_(seriesmeta[0].pop('SeriesDirectory'), op.curdir) eq_(dsmeta['Series'], [meta]) # for this artificial case pretty much the same info also comes out as # unique props, but wrapped in lists ucp = res[0]['metadata']["datalad_unique_content_properties"]['dicom'] assert_dict_equal( { k: [v] for k, v in dsmeta['Series'][0].items() if k not in DicomExtractor._unique_exclude and k in ucp }, { k: v for k, v in ucp.items() if k not in DicomExtractor._unique_exclude }) # buuuut, if we switch of file-based metadata storage ds.config.add('datalad.metadata.aggregate-content-dicom', 'false', where='dataset') ds.aggregate_metadata() res = ds.metadata(reporton='datasets') if not datalad_extracts_annex_key: # the auto-uniquified bits are gone but the Series description stays assert_not_in("datalad_unique_content_properties", res[0]['metadata']) eq_(dsmeta['Series'], [meta])
def _test_bare_git_version_1(host, dspath, store): # This test should take a dataset and create a bare repository at the remote # end from it. # Given, that it is placed correctly within a tree of dataset, that remote # thing should then be usable as an ora-remote as well as as a git-type # remote. # Note: Usability of git remote by annex depends on dataset layout version # (dirhashlower vs. -mixed). # For version 1 (lower) upload and consumption should be # interchangeable. It doesn't matter which remote is used for what # direction. ds_path = Path(dspath) store = Path(store) ds = Dataset(ds_path).create() populate_dataset(ds) ds.save() bare_repo_path, _, _ = get_layout_locations(1, store, ds.id) # Use git to make sure the remote end is what git thinks a bare clone of it # should look like subprocess.run([ 'git', 'clone', '--bare', quote_cmdlinearg(str(dspath)), quote_cmdlinearg(str(bare_repo_path)) ]) if host: url = "ria+ssh://{host}{path}".format(host=host, path=store) else: url = "ria+{}".format(store.as_uri()) init_opts = common_init_opts + ['url={}'.format(url)] # set up store: io = SSHRemoteIO(host) if host else LocalIO() create_store(io, store, '1') # set up the dataset location, too. # Note: Dataset layout version 1 (dirhash lower): create_ds_in_store(io, store, ds.id, '1', '1') # Now, let's have the bare repo as a git remote and use it with annex git_url = "ssh://{host}{path}".format(host=host, path=bare_repo_path) \ if host else bare_repo_path.as_uri() ds.repo.add_remote('bare-git', git_url) ds.repo.enable_remote('bare-git') # copy files to the remote ds.repo.copy_to('.', 'bare-git') eq_(len(ds.repo.whereis('one.txt')), 2) # now we can drop all content locally, reobtain it, and survive an # fsck ds.drop('.') ds.get('.') assert_status('ok', [annexjson2result(r, ds) for r in ds.repo.fsck()]) # Now, add the ora remote: ds.repo.init_remote('ora-remote', options=init_opts) # fsck to make availability known assert_status('ok', [ annexjson2result(r, ds) for r in ds.repo.fsck(remote='ora-remote', fast=True) ]) eq_(len(ds.repo.whereis('one.txt')), 3) # Now move content from git-remote to local and see it not being available # via bare-git anymore. ds.repo.call_annex(['move', '--all', '--from=bare-git']) # ora-remote doesn't know yet: eq_(len(ds.repo.whereis('one.txt')), 2) # But after fsck it does: fsck_res = [ annexjson2result(r, ds) for r in ds.repo.fsck(remote='ora-remote', fast=True) ] assert_result_count(fsck_res, 1, status='error', message='** Based on the location log, one.txt\n' '** was expected to be present, ' 'but its content is missing.') assert_result_count(fsck_res, 1, status='error', message='** Based on the location log, subdir/two\n' '** was expected to be present, ' 'but its content is missing.') eq_(len(ds.repo.whereis('one.txt')), 1) # and the other way around: upload via ora-remote and have it available via # git-remote: ds.repo.copy_to('.', 'ora-remote') # fsck to make availability known assert_status('ok', [ annexjson2result(r, ds) for r in ds.repo.fsck(remote='bare-git', fast=True) ]) eq_(len(ds.repo.whereis('one.txt')), 3)
def test_plugin_call(path, dspath): # make plugins create_tree( path, { 'dlplugin_dummy.py': dummy_plugin, 'dlplugin_nodocs.py': nodocs_plugin, 'dlplugin_broken.py': broken_plugin, }) fake_dummy_spec = { 'dummy': {'file': opj(path, 'dlplugin_dummy.py')}, 'nodocs': {'file': opj(path, 'dlplugin_nodocs.py')}, 'broken': {'file': opj(path, 'dlplugin_broken.py')}, } with patch('datalad.plugin._get_plugins', return_value=fake_dummy_spec): with swallow_outputs() as cmo: plugin(showplugininfo=True) # hyphen spacing depends on the longest plugin name! # sorted # summary list generation doesn't actually load plugins for speed, # hence broken is not known to be broken here eq_(cmo.out, "broken [no synopsis] ({})\ndummy - real dummy ({})\nnodocs [no synopsis] ({})\n".format( fake_dummy_spec['broken']['file'], fake_dummy_spec['dummy']['file'], fake_dummy_spec['nodocs']['file'])) with swallow_outputs() as cmo: plugin(['dummy'], showpluginhelp=True) eq_(cmo.out.rstrip(), "Usage: dummy(dataset, noval, withval='test')\n\nmydocstring") with swallow_outputs() as cmo: plugin(['nodocs'], showpluginhelp=True) eq_(cmo.out.rstrip(), "Usage: nodocs()\n\nThis plugin has no documentation") # loading fails, no docs assert_raises(ValueError, plugin, ['broken'], showpluginhelp=True) # assume this most obscure plugin name is not used assert_raises(ValueError, plugin, '32sdfhvz984--^^') # broken plugin argument, must match Python keyword arg # specs assert_raises(ValueError, plugin, ['dummy', '1245']) def fake_is_installed(*args, **kwargs): return True with patch('datalad.plugin._get_plugins', return_value=fake_dummy_spec), \ patch('datalad.distribution.dataset.Dataset.is_installed', return_value=True): # does not trip over unsupported argument, they get filtered out, because # we carry all kinds of stuff with swallow_logs(new_level=logging.WARNING) as cml: res = list(plugin(['dummy', 'noval=one', 'obscure=some'])) assert_status('ok', res) cml.assert_logged( msg=".*Ignoring plugin argument\\(s\\).*obscure.*, not supported by plugin.*", regex=True, level='WARNING') # fails on missing positional arg assert_raises(TypeError, plugin, ['dummy']) # positional and kwargs actually make it into the plugin res = list(plugin(['dummy', 'noval=one', 'withval=two']))[0] eq_('one', res['args']['noval']) eq_('two', res['args']['withval']) # kwarg defaults are preserved res = list(plugin(['dummy', 'noval=one']))[0] eq_('test', res['args']['withval']) # repeated specification yields list input res = list(plugin(['dummy', 'noval=one', 'noval=two']))[0] eq_(['one', 'two'], res['args']['noval']) # can do the same thing while bypassing argument parsing for calls # from within python, and even preserve native python dtypes res = list(plugin(['dummy', ('noval', 1), ('noval', 'two')]))[0] eq_([1, 'two'], res['args']['noval']) # and we can further simplify in this case by passing lists right # away res = list(plugin(['dummy', ('noval', [1, 'two'])]))[0] eq_([1, 'two'], res['args']['noval']) # dataset arg handling # run plugin that needs a dataset where there is none with patch('datalad.plugin._get_plugins', return_value=fake_dummy_spec): ds = None with chpwd(dspath): assert_raises(ValueError, plugin, ['dummy', 'noval=one']) # create a dataset here, fixes the error ds = create() res = list(plugin(['dummy', 'noval=one']))[0] # gives dataset instance eq_(ds, res['args']['dataset']) # no do again, giving the dataset path # but careful, `dataset` is a proper argument res = list(plugin(['dummy', 'noval=one'], dataset=dspath))[0] eq_(ds, res['args']['dataset']) # however, if passed alongside the plugins args it also works res = list(plugin(['dummy', 'dataset={}'.format(dspath), 'noval=one']))[0] eq_(ds, res['args']['dataset']) # but if both are given, the proper args takes precedence assert_raises(ValueError, plugin, ['dummy', 'dataset={}'.format(dspath), 'noval=one'], dataset='rubbish')
def test_get_recurse_subdatasets(src, path): ds = install(path, source=src, result_xfm='datasets', return_type='item-or-list') # ask for the two subdatasets specifically. This will obtain them, # but not any content of any files in them subds1, subds2 = ds.get(['subm 1', '2'], get_data=False, description="youcouldnotmakethisup", result_xfm='datasets') for d in (subds1, subds2): eq_(d.repo.get_description(), 'youcouldnotmakethisup') # there are 3 files to get: test-annex.dat within each dataset: rel_path_sub1 = opj(basename(subds1.path), 'test-annex.dat') rel_path_sub2 = opj(basename(subds2.path), 'test-annex.dat') annexed_files = {'test-annex.dat', rel_path_sub1, rel_path_sub2} # None of them is currently present: ok_(ds.repo.file_has_content('test-annex.dat') is False) ok_(subds1.repo.file_has_content('test-annex.dat') is False) ok_(subds2.repo.file_has_content('test-annex.dat') is False) assert_repo_status(subds1.path) # explicitly given path in subdataset => implicit recursion: # MIH: Nope, we fulfill the dataset handle, but that doesn't # imply fulfilling all file handles result = ds.get(rel_path_sub1, recursive=True) # the subdataset was already present assert_in_results(result, type='dataset', path=subds1.path, status='notneeded') # we got the file assert_in_results(result, path=opj(ds.path, rel_path_sub1), status='ok') assert_in_results(result, path=opj(ds.path, rel_path_sub1), status='ok') ok_(subds1.repo.file_has_content('test-annex.dat') is True) # drop it: subds1.repo.drop('test-annex.dat') ok_(subds1.repo.file_has_content('test-annex.dat') is False) # now, with a path not explicitly pointing within a # subdataset, but recursive option: # get everything: result = ds.get(recursive=True, result_filter=lambda x: x.get('type') != 'dataset') assert_status('ok', result) eq_( set([ item.get('path')[len(ds.path) + 1:] for item in result if item['type'] == 'file' ]), annexed_files) ok_(ds.repo.file_has_content('test-annex.dat') is True) ok_(subds1.repo.file_has_content('test-annex.dat') is True) ok_(subds2.repo.file_has_content('test-annex.dat') is True) # drop them: ds.repo.drop('test-annex.dat') subds1.repo.drop('test-annex.dat') subds2.repo.drop('test-annex.dat') ok_(ds.repo.file_has_content('test-annex.dat') is False) ok_(subds1.repo.file_has_content('test-annex.dat') is False) ok_(subds2.repo.file_has_content('test-annex.dat') is False) # now, the very same call, but without recursive: result = ds.get('.', recursive=False) assert_status('ok', result) # no duplicate reporting on subdataset install and annex-get of its # directory eq_(len(result), 1) assert_result_count(result, 1, path=opj(ds.path, 'test-annex.dat'), status='ok') ok_(ds.repo.file_has_content('test-annex.dat') is True) ok_(subds1.repo.file_has_content('test-annex.dat') is False) ok_(subds2.repo.file_has_content('test-annex.dat') is False)
def check_push(annex, src_path, dst_path): # prepare src src = Dataset(src_path).create(annex=annex) src_repo = src.repo # push should not add branches to the local dataset orig_branches = src_repo.get_branches() assert_not_in('synced/master', orig_branches) res = src.push(on_failure='ignore') assert_result_count(res, 1) assert_in_results( res, status='impossible', message='No push target given, and none could be auto-detected, ' 'please specific via --to') eq_(orig_branches, src_repo.get_branches()) # target sibling target = mk_push_target(src, 'target', dst_path, annex=annex) eq_(orig_branches, src_repo.get_branches()) res = src.push(to="target") eq_(orig_branches, src_repo.get_branches()) assert_result_count(res, 2 if annex else 1) assert_in_results(res, action='publish', status='ok', target='target', refspec='refs/heads/master:refs/heads/master', operations=['new-branch']) assert_repo_status(src_repo, annex=annex) eq_(list(target.get_branch_commits_("master")), list(src_repo.get_branch_commits_("master"))) # configure a default merge/upstream target src.config.set('branch.master.remote', 'target', where='local') src.config.set('branch.master.merge', 'master', where='local') # don't fail when doing it again, no explicit target specification # needed anymore res = src.push() eq_(orig_branches, src_repo.get_branches()) # and nothing is pushed assert_status('notneeded', res) assert_repo_status(src_repo, annex=annex) eq_(list(target.get_branch_commits_("master")), list(src_repo.get_branch_commits_("master"))) # some modification: (src.pathobj / 'test_mod_file').write_text("Some additional stuff.") src.save(to_git=True, message="Modified.") (src.pathobj / 'test_mod_annex_file').write_text("Heavy stuff.") src.save(to_git=not annex, message="Modified again.") assert_repo_status(src_repo, annex=annex) res = src.push(to='target', since="HEAD~2", jobs=2) assert_in_results( res, action='publish', status='ok', target='target', refspec='refs/heads/master:refs/heads/master', # we get to see what happened operations=['fast-forward']) if annex: # we got to see the copy result for the annexed files assert_in_results(res, action='copy', status='ok', path=str(src.pathobj / 'test_mod_annex_file')) # we published, so we can drop and reobtain ok_(src_repo.file_has_content('test_mod_annex_file')) src_repo.drop('test_mod_annex_file') ok_(not src_repo.file_has_content('test_mod_annex_file')) src_repo.get('test_mod_annex_file') ok_(src_repo.file_has_content('test_mod_annex_file')) ok_file_has_content(src_repo.pathobj / 'test_mod_annex_file', 'Heavy stuff.') eq_(list(target.get_branch_commits_("master")), list(src_repo.get_branch_commits_("master"))) if not (annex and src_repo.is_managed_branch()): # the following doesn't make sense in managed branches, because # a commit that could be amended is no longer the last commit # of a branch after a sync has happened (which did happen # during the last push above # amend and change commit msg in order to test for force push: src_repo.commit("amended", options=['--amend']) # push should be rejected (non-fast-forward): res = src.push(to='target', since='HEAD~2', on_failure='ignore') # fails before even touching the annex branch assert_result_count(res, 1) assert_in_results(res, action='publish', status='error', target='target', refspec='refs/heads/master:refs/heads/master', operations=['rejected', 'error']) # push with force=True works: res = src.push(to='target', since='HEAD~2', force='gitpush') assert_in_results(res, action='publish', status='ok', target='target', refspec='refs/heads/master:refs/heads/master', operations=['forced-update']) eq_(list(target.get_branch_commits_("master")), list(src_repo.get_branch_commits_("master"))) # we do not have more branches than we had in the beginning # in particular no 'synced/master' eq_(orig_branches, src_repo.get_branches())