def check_renamed_file(recursive, no_annex, path): ds = Dataset(path).create(no_annex=no_annex) create_tree(path, {'old': ''}) ds.repo.add('old') ds.repo._git_custom_command(['old', 'new'], ['git', 'mv']) ds.save(recursive=recursive) assert_repo_status(path)
def test_delete(client): client, ds = client with client as c: assert client.delete('/api/v1/file').status_code == 401 assert c.get('/api/v1/auth').status_code == 200 # missing path assert client.delete('/api/v1/file').status_code == 400 testpath = 'subdir/dummy' file_content = '{"three": 3}' # resource picks up live changes to the dataset create_tree(ds.path, {'subdir': {'dummy': file_content}}) ds.save() assert testpath in c.get('/api/v1/file').get_json()['files'] rq = c.delete( '/api/v1/file', data=json.dumps(dict( path=testpath, verify_availability=False, )), content_type='application/json', ).get_json() if ds.config.obtain('datalad.repo.direct', False): # https://github.com/datalad/datalad/issues/2836 return assert_result_count(rq, 1, action='remove', status='ok', path=testpath) assert testpath not in c.get('/api/v1/file').get_json()['files']
def test_clone_into_dataset(source, top_path): ds = create(top_path) assert_repo_status(ds.path) subds = ds.clone(source, "sub", result_xfm='datasets', return_type='item-or-list') ok_((subds.pathobj / '.git').is_dir()) ok_(subds.is_installed()) assert_in('sub', ds.subdatasets(fulfilled=True, result_xfm='relpaths')) # sub is clean: assert_repo_status(subds.path, annex=None) # top is clean: assert_repo_status(ds.path, annex=None) # but we could also save while installing and there should be no side-effect # of saving any other changes if we state to not auto-save changes # Create a dummy change create_tree(ds.path, {'dummy.txt': 'buga'}) assert_repo_status(ds.path, untracked=['dummy.txt']) subds_ = ds.clone(source, "sub2", result_xfm='datasets', return_type='item-or-list') eq_(subds_.pathobj, ds.pathobj / "sub2") # for paranoid yoh ;) assert_repo_status(ds.path, untracked=['dummy.txt'])
def test_install_into_dataset(source, top_path): ds = create(top_path) ok_clean_git(ds.path) subds = ds.install("sub", source=source, save=False) if isinstance(subds.repo, AnnexRepo) and subds.repo.is_direct_mode(): ok_(exists(opj(subds.path, '.git'))) else: ok_(isdir(opj(subds.path, '.git'))) ok_(subds.is_installed()) assert_in('sub', ds.subdatasets(result_xfm='relpaths')) # sub is clean: ok_clean_git(subds.path, annex=None) # top is too: ok_clean_git(ds.path, annex=None) ds.save('addsub') # now it is: ok_clean_git(ds.path, annex=None) # but we could also save while installing and there should be no side-effect # of saving any other changes if we state to not auto-save changes # Create a dummy change create_tree(ds.path, {'dummy.txt': 'buga'}) ok_clean_git(ds.path, untracked=['dummy.txt']) subds_ = ds.install("sub2", source=source) eq_(subds_.path, opj(ds.path, "sub2")) # for paranoid yoh ;) ok_clean_git(ds.path, untracked=['dummy.txt']) # and we should achieve the same behavior if we create a dataset # and then decide to add it create(_path_(top_path, 'sub3')) ok_clean_git(ds.path, untracked=['dummy.txt', 'sub3/']) ds.add('sub3') ok_clean_git(ds.path, untracked=['dummy.txt'])
def test_clone_into_dataset(source, top_path): ds = create(top_path) ok_clean_git(ds.path) subds = ds.clone(source, "sub", result_xfm='datasets', return_type='item-or-list') if isinstance(subds.repo, AnnexRepo) and subds.repo.is_direct_mode(): ok_(exists(opj(subds.path, '.git'))) else: ok_(isdir(opj(subds.path, '.git'))) ok_(subds.is_installed()) assert_in('sub', ds.subdatasets(fulfilled=True, result_xfm='relpaths')) # sub is clean: ok_clean_git(subds.path, annex=None) # top is clean: ok_clean_git(ds.path, annex=None) # but we could also save while installing and there should be no side-effect # of saving any other changes if we state to not auto-save changes # Create a dummy change create_tree(ds.path, {'dummy.txt': 'buga'}) ok_clean_git(ds.path, untracked=['dummy.txt']) subds_ = ds.clone(source, "sub2", result_xfm='datasets', return_type='item-or-list') eq_(subds_.path, opj(ds.path, "sub2")) # for paranoid yoh ;) ok_clean_git(ds.path, untracked=['dummy.txt'])
def test_publish_gh1691(origin, src_path, dst_path): # prepare src; no subdatasets installed, but mount points present source = install(src_path, source=origin, recursive=False) ok_(exists(opj(src_path, "subm 1"))) assert_false(Dataset(opj(src_path, "subm 1")).is_installed()) # some content modification of the superdataset create_tree(src_path, {'probe1': 'probe1'}) source.add('probe1') ok_clean_git(src_path) # create the target(s): source.create_sibling('ssh://localhost:' + dst_path, name='target', recursive=True) # publish recursively, which silently ignores non-installed datasets results = source.publish(to='target', recursive=True) assert_result_count(results, 1) assert_result_count(results, 1, status='ok', type='dataset', path=source.path) # if however, a non-installed subdataset is requsted explicitly, it'll fail results = source.publish(path='subm 1', to='target', on_failure='ignore') assert_result_count(results, 1, status='impossible', type='dataset', action='publish')
def test_install_into_dataset(source, top_path): ds = create(top_path) ok_clean_git(ds.path) subds = ds.install("sub", source=source, save=False) if isinstance(subds.repo, AnnexRepo) and subds.repo.is_direct_mode(): ok_(exists(opj(subds.path, '.git'))) else: ok_(isdir(opj(subds.path, '.git'))) ok_(subds.is_installed()) assert_in('sub', ds.subdatasets(result_xfm='relpaths')) # sub is clean: ok_clean_git(subds.path, annex=None) # top is too: ok_clean_git(ds.path, annex=None) ds.save('addsub') # now it is: ok_clean_git(ds.path, annex=None) # but we could also save while installing and there should be no side-effect # of saving any other changes if we state to not auto-save changes # Create a dummy change create_tree(ds.path, {'dummy.txt': 'buga'}) ok_clean_git(ds.path, untracked=['dummy.txt']) subds_ = ds.install("sub2", source=source) eq_(subds_.path, opj(ds.path, "sub2")) # for paranoid yoh ;) ok_clean_git(ds.path, untracked=['dummy.txt']) # and we should achieve the same behavior if we create a dataset # and then decide to add it create(_path_(top_path, 'sub3')) ok_clean_git(ds.path, untracked=['dummy.txt', 'sub3/']) ds.add('sub3') ok_clean_git(ds.path, untracked=['dummy.txt'])
def test_symlinked_relpath(path): # initially ran into on OSX https://github.com/datalad/datalad/issues/2406 os.makedirs(op.join(path, "origin")) dspath = op.join(path, "linked") os.symlink('origin', dspath) ds = Dataset(dspath).create() create_tree( dspath, { "mike1": 'mike1', # will be added from topdir "later": "later", # later from within subdir "d": { "mike2": 'mike2', # to be added within subdir } }) # in the root of ds with chpwd(dspath): ds.repo.add("mike1", git=True) ds.save(message="committing", path="./mike1") # Let's also do in subdirectory as CWD, check that relative path # given to a plain command (not dataset method) are treated as # relative to CWD with chpwd(op.join(dspath, 'd')): save(dataset=ds.path, message="committing", path="mike2") later = op.join(op.pardir, "later") ds.repo.add(later, git=True) save(dataset=ds.path, message="committing", path=later) assert_repo_status(dspath)
def setup_class(cls): mktmp_kws = get_tempfile_kwargs() path = tempfile.mkdtemp(**mktmp_kws) create_tree( path, { "udir": { x + ".dat" + ver: x + " content" for x in "abcd" for ver in ["", ".v1"] } }) cls._hpath = HTTPPath(path) cls._hpath.start() cls.url = cls._hpath.url cls.json_file = tempfile.mktemp(suffix=".json", **mktmp_kws) with open(cls.json_file, "w") as jfh: json.dump([{ "url": cls.url + "udir/a.dat", "name": "a", "subdir": "foo" }, { "url": cls.url + "udir/b.dat", "name": "b", "subdir": "bar" }, { "url": cls.url + "udir/c.dat", "name": "c", "subdir": "foo" }], jfh)
def test_publish_gh1691(origin, src_path, dst_path): # prepare src; no subdatasets installed, but mount points present source = install(src_path, source=origin, recursive=False) ok_(exists(opj(src_path, "subm 1"))) assert_false(Dataset(opj(src_path, "subm 1")).is_installed()) # some content modification of the superdataset create_tree(src_path, {'probe1': 'probe1'}) source.add('probe1') ok_clean_git(src_path) # create the target(s): source.create_sibling( 'ssh://localhost:' + dst_path, name='target', recursive=True) # publish recursively, which silently ignores non-installed datasets results = source.publish(to='target', recursive=True) assert_result_count(results, 1) assert_result_count(results, 1, status='ok', type='dataset', path=source.path) # if however, a non-installed subdataset is requsted explicitly, it'll fail results = source.publish(path='subm 1', to='target', on_failure='ignore') assert_result_count(results, 1, status='impossible', type='dataset', action='publish')
def test_subdataset_save(path): parent = Dataset(path).create() sub = parent.create('sub') assert_repo_status(parent.path) create_tree(parent.path, { "untracked": 'ignore', 'sub': { "new": "wanted"}}) sub.save('new') # defined state: one untracked, modified (but clean in itself) subdataset assert_repo_status(sub.path) assert_repo_status(parent.path, untracked=['untracked'], modified=['sub']) # `save sub` does not save the parent!! with chpwd(parent.path): assert_status('notneeded', save(dataset=sub.path)) assert_repo_status(parent.path, untracked=['untracked'], modified=['sub']) # `save -u .` saves the state change in the subdataset, # but leaves any untracked content alone with chpwd(parent.path): assert_status('ok', parent.save(updated=True)) assert_repo_status(parent.path, untracked=['untracked']) # get back to the original modified state and check that -S behaves in # exactly the same way create_tree(parent.path, { 'sub': { "new2": "wanted2"}}) sub.save('new2') assert_repo_status(parent.path, untracked=['untracked'], modified=['sub'])
def test_subdataset_save(path): parent = Dataset(path).create() sub = parent.create('sub') ok_clean_git(parent.path) create_tree(parent.path, {"untracked": 'ignore', 'sub': {"new": "wanted"}}) sub.add('new') # defined state: one untracked, modified (but clean in itself) subdataset ok_clean_git(sub.path) ok_clean_git(parent.path, untracked=['untracked'], index_modified=['sub']) # `save sub` does not save the parent!! with chpwd(parent.path): assert_status('notneeded', save(path=sub.path)) ok_clean_git(parent.path, untracked=['untracked'], index_modified=['sub']) # `save -d .` saves the state change in the subdataset, but leaves any untracked # content alone with chpwd(parent.path): assert_status('ok', parent.save()) ok_clean_git(parent.path, untracked=['untracked']) # get back to the original modified state and check that -S behaves in # exactly the same way create_tree(parent.path, {'sub': {"new2": "wanted2"}}) sub.add('new2') ok_clean_git(parent.path, untracked=['untracked'], index_modified=['sub']) with chpwd(parent.path): assert_status( # notneeded to save sub, but need to save parent ['ok', 'notneeded'], # the key condition of this test is that no reference dataset is # given! save(path='sub', super_datasets=True)) # save super must not cause untracked content to be commited! ok_clean_git(parent.path, untracked=['untracked'])
def test_add_recursive(path): # make simple hierarchy parent = Dataset(path).create() assert_repo_status(parent.path) sub1 = parent.create(op.join('down', 'sub1')) assert_repo_status(parent.path) sub2 = parent.create('sub2') # next one make the parent dirty subsub = sub2.create('subsub') assert_repo_status(parent.path, modified=['sub2']) res = parent.save() assert_repo_status(parent.path) # now add content deep in the hierarchy create_tree(subsub.path, {'new': 'empty'}) assert_repo_status(parent.path, modified=['sub2']) # recursive add should not even touch sub1, because # it knows that it is clean res = parent.save(recursive=True) # the key action is done assert_result_count(res, 1, path=op.join(subsub.path, 'new'), action='add', status='ok') # saved all the way up assert_result_count(res, 3, action='save', status='ok') assert_repo_status(parent.path)
def test_symlinked_relpath(path): # initially ran into on OSX https://github.com/datalad/datalad/issues/2406 os.makedirs(opj(path, "origin")) dspath = opj(path, "linked") os.symlink('origin', dspath) ds = Dataset(dspath).create() create_tree(dspath, { "mike1": 'mike1', # will be added from topdir "later": "later", # later from within subdir "d": { "mike2": 'mike2', # to be added within subdir } }) # in the root of ds with chpwd(dspath): ds.repo.add("mike1", git=True) ds.save("committing", path="./mike1") # Let's also do in subdirectory with chpwd(opj(dspath, 'd')): ds.repo.add("mike2", git=True) ds.save("committing", path="./mike2") later = opj(pardir, "later") ds.repo.add(later, git=True) ds.save("committing", path=later) ok_clean_git(dspath)
def test_symlinked_relpath(path): # initially ran into on OSX https://github.com/datalad/datalad/issues/2406 os.makedirs(opj(path, "origin")) dspath = opj(path, "linked") os.symlink('origin', dspath) ds = Dataset(dspath).create() create_tree(dspath, { "mike1": 'mike1', # will be added from topdir "later": "later", # later from within subdir "d": { "mike2": 'mike2', # to be added within subdir } }) # in the root of ds with chpwd(dspath): ds.repo.add("mike1", git=True) ds._save("committing", path="./mike1") # Let's also do in subdirectory with chpwd(opj(dspath, 'd')): ds.repo.add("mike2", git=True) ds._save("committing", path="./mike2") later = opj(pardir, "later") ds.repo.add(later, git=True) ds._save("committing", path=later) ok_clean_git(dspath)
def test_add_recursive(path): # make simple hierarchy parent = Dataset(path).create() ok_clean_git(parent.path) sub1 = parent.create(opj('down', 'sub1')) ok_clean_git(parent.path) sub2 = parent.create('sub2') # next one make the parent dirty subsub = sub2.create('subsub') ok_clean_git(parent.path, index_modified=['sub2']) res = parent.save() ok_clean_git(parent.path) # now add content deep in the hierarchy create_tree(subsub.path, {'new': 'empty'}) ok_clean_git(parent.path, index_modified=['sub2']) # recursive add should not even touch sub1, because # it knows that it is clean res = parent.add('.', recursive=True) # the key action is done assert_result_count( res, 1, path=opj(subsub.path, 'new'), action='add', status='ok') # sub1 is untouched, and not reported assert_result_count(res, 0, path=sub1.path) # saved all the way up assert_result_count(res, 3, action='save', status='ok') ok_clean_git(parent.path)
def check_renamed_file(recursive, annex, path): ds = Dataset(path).create(annex=annex) create_tree(path, {'old': ''}) ds.repo.add('old') ds.repo.call_git(["mv"], files=["old", "new"]) ds.save(recursive=recursive) assert_repo_status(path)
def test_symlinked_relpath(path): # initially ran into on OSX https://github.com/datalad/datalad/issues/2406 os.makedirs(op.join(path, "origin")) dspath = op.join(path, "linked") os.symlink('origin', dspath) ds = Dataset(dspath).create() create_tree(dspath, { "mike1": 'mike1', # will be added from topdir "later": "later", # later from within subdir "d": { "mike2": 'mike2', # to be added within subdir } }) # in the root of ds with chpwd(dspath): ds.repo.add("mike1", git=True) ds.save(message="committing", path="./mike1") # Let's also do in subdirectory as CWD, check that relative path # given to a plain command (not dataset method) are treated as # relative to CWD with chpwd(op.join(dspath, 'd')): save(dataset=ds.path, message="committing", path="mike2") later = op.join(op.pardir, "later") ds.repo.add(later, git=True) save(dataset=ds.path, message="committing", path=later) assert_repo_status(dspath)
def check_renamed_file(recursive, no_annex, path): ds = Dataset(path).create(no_annex=no_annex) create_tree(path, {'old': ''}) ds.add('old') ds.repo._git_custom_command(['old', 'new'], ['git', 'mv']) ds._save(recursive=recursive) ok_clean_git(path)
def test_subdataset_save(path): parent = Dataset(path).create() sub = parent.create('sub') assert_repo_status(parent.path) create_tree(parent.path, {"untracked": 'ignore', 'sub': {"new": "wanted"}}) sub.save('new') # defined state: one untracked, modified (but clean in itself) subdataset assert_repo_status(sub.path) assert_repo_status(parent.path, untracked=['untracked'], modified=['sub']) # `save sub` does not save the parent!! with chpwd(parent.path): assert_status('notneeded', save(dataset=sub.path)) assert_repo_status(parent.path, untracked=['untracked'], modified=['sub']) # `save -u .` saves the state change in the subdataset, # but leaves any untracked content alone with chpwd(parent.path): assert_status('ok', parent.save(updated=True)) assert_repo_status(parent.path, untracked=['untracked']) # get back to the original modified state and check that -S behaves in # exactly the same way create_tree(parent.path, {'sub': {"new2": "wanted2"}}) sub.save('new2') assert_repo_status(parent.path, untracked=['untracked'], modified=['sub'])
def make_studyforrest_mockup(path): """Generate a dataset structure mimicking aspects of studyforrest.org Under the given path there are two directories: public - to be published datasets private - never to be published datasets The 'public' directory itself is a superdataset, the 'private' directory is just a directory that contains standalone datasets in subdirectories. """ public = create(opj(path, 'public'), description="umbrella dataset") # the following tries to capture the evolution of the project phase1 = public.create('phase1', description='old-style, no connection to RAW') structural = public.create('structural', description='anatomy') tnt = public.create('tnt', description='image templates') tnt.clone(source=phase1.path, path=opj('src', 'phase1'), reckless='auto') tnt.clone(source=structural.path, path=opj('src', 'structural'), reckless='auto') aligned = public.create('aligned', description='aligned image data') aligned.clone(source=phase1.path, path=opj('src', 'phase1'), reckless='auto') aligned.clone(source=tnt.path, path=opj('src', 'tnt'), reckless='auto') # new acquisition labet = create(opj(path, 'private', 'labet'), description="raw data ET") phase2_dicoms = create(opj(path, 'private', 'p2dicoms'), description="raw data P2MRI") phase2 = public.create('phase2', description='new-style, RAW connection') phase2.clone(source=labet.path, path=opj('src', 'labet'), reckless='auto') phase2.clone(source=phase2_dicoms.path, path=opj('src', 'dicoms'), reckless='auto') # add to derivatives tnt.clone(source=phase2.path, path=opj('src', 'phase2'), reckless='auto') aligned.clone(source=phase2.path, path=opj('src', 'phase2'), reckless='auto') # never to be published media files media = create(opj(path, 'private', 'media'), description="raw data ET") # assuming all annotations are in one dataset (in reality this is also # a superdatasets with about 10 subdatasets annot = public.create('annotations', description='stimulus annotation') annot.clone(source=media.path, path=opj('src', 'media'), reckless='auto') # a few typical analysis datasets # (just doing 3, actual status quo is just shy of 10) # and also the real goal -> meta analysis metaanalysis = public.create('metaanalysis', description="analysis of analyses") for i in range(1, 3): ana = public.create('analysis{}'.format(i), description='analysis{}'.format(i)) ana.clone(source=annot.path, path=opj('src', 'annot'), reckless='auto') ana.clone(source=aligned.path, path=opj('src', 'aligned'), reckless='auto') ana.clone(source=tnt.path, path=opj('src', 'tnt'), reckless='auto') # link to metaanalysis metaanalysis.clone(source=ana.path, path=opj('src', 'ana{}'.format(i)), reckless='auto') # simulate change in an input (but not raw) dataset create_tree( aligned.path, {'modification{}.txt'.format(i): 'unique{}'.format(i)}) aligned.save() # finally aggregate data aggregate = public.create('aggregate', description='aggregate data') aggregate.clone(source=aligned.path, path=opj('src', 'aligned'), reckless='auto')
def make_studyforrest_mockup(path): """Generate a dataset structure mimicking aspects of studyforrest.org Under the given path there are two directories: public - to be published datasets private - never to be published datasets The 'public' directory itself is a superdataset, the 'private' directory is just a directory that contains standalone datasets in subdirectories. """ public = create(opj(path, 'public'), description="umbrella dataset") # the following tries to capture the evolution of the project phase1 = public.create('phase1', description='old-style, no connection to RAW') structural = public.create('structural', description='anatomy') tnt = public.create('tnt', description='image templates') tnt.clone(source=phase1.path, path=opj('src', 'phase1'), reckless=True) tnt.clone(source=structural.path, path=opj('src', 'structural'), reckless=True) aligned = public.create('aligned', description='aligned image data') aligned.clone(source=phase1.path, path=opj('src', 'phase1'), reckless=True) aligned.clone(source=tnt.path, path=opj('src', 'tnt'), reckless=True) # new acquisition labet = create(opj(path, 'private', 'labet'), description="raw data ET") phase2_dicoms = create(opj(path, 'private', 'p2dicoms'), description="raw data P2MRI") phase2 = public.create('phase2', description='new-style, RAW connection') phase2.clone(source=labet.path, path=opj('src', 'labet'), reckless=True) phase2.clone(source=phase2_dicoms.path, path=opj('src', 'dicoms'), reckless=True) # add to derivatives tnt.clone(source=phase2.path, path=opj('src', 'phase2'), reckless=True) aligned.clone(source=phase2.path, path=opj('src', 'phase2'), reckless=True) # never to be published media files media = create(opj(path, 'private', 'media'), description="raw data ET") # assuming all annotations are in one dataset (in reality this is also # a superdatasets with about 10 subdatasets annot = public.create('annotations', description='stimulus annotation') annot.clone(source=media.path, path=opj('src', 'media'), reckless=True) # a few typical analysis datasets # (just doing 3, actual status quo is just shy of 10) # and also the real goal -> meta analysis metaanalysis = public.create('metaanalysis', description="analysis of analyses") for i in range(1, 3): ana = public.create('analysis{}'.format(i), description='analysis{}'.format(i)) ana.clone(source=annot.path, path=opj('src', 'annot'), reckless=True) ana.clone(source=aligned.path, path=opj('src', 'aligned'), reckless=True) ana.clone(source=tnt.path, path=opj('src', 'tnt'), reckless=True) # link to metaanalysis metaanalysis.clone(source=ana.path, path=opj('src', 'ana{}'.format(i)), reckless=True) # simulate change in an input (but not raw) dataset create_tree( aligned.path, {'modification{}.txt'.format(i): 'unique{}'.format(i)}) aligned.save() # finally aggregate data aggregate = public.create('aggregate', description='aggregate data') aggregate.clone(source=aligned.path, path=opj('src', 'aligned'), reckless=True)
def _test_target_ssh_inherit(standardgroup, src_path, target_path): ds = Dataset(src_path).create() target_url = 'localhost:%s' % target_path remote = "magical" # for the test of setting a group, will just smoke test while using current # user's group ds.create_sibling(target_url, name=remote, shared='group', group=os.getgid()) # not doing recursively if standardgroup: ds.repo.set_preferred_content('wanted', 'standard', remote) ds.repo.set_preferred_content('group', standardgroup, remote) ds.publish(to=remote) # now a month later we created a new subdataset subds = ds.create('sub') # so now we got a hierarchy! create_tree(subds.path, {'sub.dat': 'lots of data'}) subds.add('sub.dat') ok_file_under_git(subds.path, 'sub.dat', annexed=True) target_sub = Dataset(opj(target_path, 'sub')) # since we do not have yet/thus have not used an option to record to publish # to that sibling by default (e.g. --set-upstream), if we run just ds.publish # -- should fail assert_result_count( ds.publish(on_failure='ignore'), 1, status='impossible', message= 'No target sibling configured for default publication, please specific via --to' ) ds.publish( to=remote) # should be ok, non recursive; BUT it (git or us?) would # create an empty sub/ directory ok_(not target_sub.is_installed()) # still not there res = ds.publish(to=remote, recursive=True, on_failure='ignore') assert_result_count(res, 2) assert_status(('error', 'notneeded'), res) assert_result_count(res, 1, status='error', message=("Unknown target sibling '%s' for publication", 'magical')) ds.publish(to=remote, recursive=True, missing='inherit') # we added the remote and set all the eq_(subds.repo.get_preferred_content('wanted', remote), 'standard' if standardgroup else '') eq_(subds.repo.get_preferred_content('group', remote), standardgroup or '') ok_(target_sub.is_installed()) # it is there now eq_(target_sub.repo.config.get('core.sharedrepository'), '1') # and we have transferred the content if standardgroup and standardgroup == 'backup': # only then content should be copied ok_file_has_content(opj(target_sub.path, 'sub.dat'), 'lots of data') else: # otherwise nothing is copied by default assert_false(target_sub.repo.file_has_content('sub.dat'))
def test_invalid_call(path): with chpwd(path): # no dataset, no luck assert_raises(NoDatasetArgumentFound, run, 'doesntmatter') # dirty dataset ds = Dataset(path).create() create_tree(ds.path, {'this': 'dirty'}) assert_status('impossible', run('doesntmatter', on_failure='ignore'))
def test_invalid_call(path): with chpwd(path): # no dataset, no luck assert_raises(NoDatasetArgumentFound, run, 'doesntmatter') # dirty dataset ds = Dataset(path).create() create_tree(ds.path, {'this': 'dirty'}) assert_status('impossible', run('doesntmatter', on_failure='ignore'))
def test_add_message_file(path): ds = Dataset(path).create() with assert_raises(ValueError): ds.add("blah", message="me", message_file="and me") create_tree(path, {"foo": "x", "msg": u"add β"}) ds.add("foo", message_file=opj(ds.path, "msg")) assert_equal(ds.repo.format_commit("%s"), u"add β")
def test_newthings_coming_down(originpath, destpath): origin = GitRepo(originpath, create=True) create_tree(originpath, {'load.dat': 'heavy'}) Dataset(originpath).save('load.dat') ds = install(source=originpath, path=destpath, result_xfm='datasets', return_type='item-or-list') assert_is_instance(ds.repo, GitRepo) assert_in('origin', ds.repo.get_remotes()) # turn origin into an annex origin = AnnexRepo(originpath, create=True) # clone doesn't know yet assert_false(knows_annex(ds.path)) # but after an update it should # no merge, only one sibling, no parameters should be specific enough assert_result_count(ds.update(), 1, status='ok', type='dataset') assert (knows_annex(ds.path)) # no branches appeared eq_(ds.repo.get_branches(), [DEFAULT_BRANCH]) # now merge, and get an annex assert_result_count(ds.update(merge=True), 1, action='update', status='ok', type='dataset') assert_in('git-annex', ds.repo.get_branches()) assert_is_instance(ds.repo, AnnexRepo) # should be fully functional testfname = opj(ds.path, 'load.dat') assert_false(ds.repo.file_has_content(testfname)) ds.get('.') ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy') # check that a new tag comes down origin.tag('first!') assert_result_count(ds.update(), 1, status='ok', type='dataset') eq_(ds.repo.get_tags(output='name')[0], 'first!') # and now we destroy the remote annex origin.call_git(['config', '--remove-section', 'annex']) rmtree(opj(origin.path, '.git', 'annex'), chmod_files=True) origin.call_git(['branch', '-D', 'git-annex']) origin = GitRepo(originpath) assert_false(knows_annex(originpath)) # and update the local clone # for now this should simply not fail (see gh-793), later might be enhanced to a # graceful downgrade before_branches = ds.repo.get_branches() assert_result_count(ds.update(), 1, status='ok', type='dataset') eq_(before_branches, ds.repo.get_branches()) # annex branch got pruned eq_(['origin/HEAD', 'origin/' + DEFAULT_BRANCH], ds.repo.get_remote_branches()) # check that a new tag comes down even if repo types mismatch origin.tag('second!') assert_result_count(ds.update(), 1, status='ok', type='dataset') eq_(ds.repo.get_tags(output='name')[-1], 'second!')
def test_save_message_file(path): ds = Dataset(path).create() with assert_raises(ValueError): ds.save("blah", message="me", message_file="and me") create_tree(path, {"foo": "x", "msg": "add foo"}) ds.repo.add("foo") ds.save(message_file=op.join(ds.path, "msg")) eq_(ds.repo.repo.git.show("--format=%s", "--no-patch"), "add foo")
def test_preserve_attrs(src, dest): create_tree(src, {"src": {"foo": {"bar": "This is test text."}}}) os.utime(opj(src, "src", "foo", "bar"), (1234567890, 1234567890)) _RunnerAdapter().put(opj(src, "src"), dest, recursive=True, preserve_attrs=True) s = os.stat(opj(dest, "src", "foo", "bar")) assert s.st_atime == 1234567890 assert s.st_mtime == 1234567890 with open(opj(dest, "src", "foo", "bar")) as fp: assert fp.read() == "This is test text."
def test_save_message_file(path): ds = Dataset(path).create() with assert_raises(IncompleteResultsError): ds.save("blah", message="me", message_file="and me") create_tree(path, {"foo": "x", "msg": "add foo"}) ds.add("foo", save=False) ds.save(message_file=opj(ds.path, "msg")) assert_equal(ds.repo.repo.git.show("--format=%s", "--no-patch"), "add foo")
def test_update_simple(origin, src_path, dst_path): # prepare src source = install(src_path, source=origin, recursive=True)[0] # forget we cloned it (provide no 'origin' anymore), which should lead to # setting tracking branch to target: source.repo.remove_remote("origin") # get a clone to update later on: dest = install(dst_path, source=src_path, recursive=True)[0] # test setup done; # assert all fine ok_clean_git(dst_path) ok_clean_git(src_path) # update yields nothing => up-to-date # TODO: how to test besides not failing? dest.update() ok_clean_git(dst_path) # modify origin: with open(opj(src_path, "update.txt"), "w") as f: f.write("Additional content") source.add(path="update.txt") source.save("Added update.txt") ok_clean_git(src_path) # update without `merge` only fetches: dest.update() # modification is not known to active branch: assert_not_in("update.txt", dest.repo.get_files(dest.repo.get_active_branch())) # modification is known to branch origin/master assert_in("update.txt", dest.repo.get_files("origin/master")) # merge: dest.update(merge=True) # modification is now known to active branch: assert_in("update.txt", dest.repo.get_files(dest.repo.get_active_branch())) # it's known to annex, but has no content yet: dest.repo.get_file_key("update.txt") # raises if unknown eq_([False], dest.repo.file_has_content(["update.txt"])) # smoke-test if recursive update doesn't fail if submodule is removed dest.remove('subm 1') dest.update(recursive=True) dest.update(merge=True, recursive=True) # and now test recursive update with merging in differences create_tree(opj(source.path, 'subm 2'), {'load.dat': 'heavy'}) source.save(message="saving changes within subm2", recursive=True, all_changes=True) dest.update(merge=True, recursive=True) # and now we can get new file dest.get('subm 2/load.dat') ok_file_has_content(opj(dest.path, 'subm 2', 'load.dat'), 'heavy')
def test_update_simple(origin, src_path, dst_path): # prepare src source = install(src_path, source=origin, recursive=True)[0] # forget we cloned it (provide no 'origin' anymore), which should lead to # setting tracking branch to target: source.repo.remove_remote("origin") # get a clone to update later on: dest = install(dst_path, source=src_path, recursive=True)[0] # test setup done; # assert all fine ok_clean_git(dst_path) ok_clean_git(src_path) # update yields nothing => up-to-date # TODO: how to test besides not failing? dest.update() ok_clean_git(dst_path) # modify origin: with open(opj(src_path, "update.txt"), "w") as f: f.write("Additional content") source.add(path="update.txt") source.save("Added update.txt") ok_clean_git(src_path) # update without `merge` only fetches: dest.update() # modification is not known to active branch: assert_not_in("update.txt", dest.repo.get_files(dest.repo.get_active_branch())) # modification is known to branch origin/master assert_in("update.txt", dest.repo.get_files("origin/master")) # merge: dest.update(merge=True) # modification is now known to active branch: assert_in("update.txt", dest.repo.get_files(dest.repo.get_active_branch())) # it's known to annex, but has no content yet: dest.repo.get_file_key("update.txt") # raises if unknown eq_([False], dest.repo.file_has_content(["update.txt"])) # smoke-test if recursive update doesn't fail if submodule is removed dest.remove('subm 1') dest.update(recursive=True) dest.update(merge=True, recursive=True) # and now test recursive update with merging in differences create_tree(opj(source.path, 'subm 2'), {'load.dat': 'heavy'}) source.save(message="saving changes within subm2", recursive=True, all_changes=True) dest.update(merge=True, recursive=True) # and now we can get new file dest.get('subm 2/load.dat') ok_file_has_content(opj(dest.path, 'subm 2', 'load.dat'), 'heavy')
def test_read(client): client, ds = client with client as c: assert c.get('/api/v1/auth').status_code == 200 existing_files = c.get('/api/v1/file').get_json()['files'] file_content = '{"three": 3}' # resource picks up live changes to the dataset create_tree(ds.path, {'subdir': {'dummy': file_content}}) ds.save() current_files = c.get('/api/v1/file').get_json()['files'] testpath = 'subdir/dummy' assert testpath not in existing_files assert testpath in current_files # simple path filtering assert c.get('/api/v1/file/*dummy').get_json()['files'] == [testpath] # request file content in various supported ways for a, kwa in ( # plain URL routing (('/api/v1/file/subdir/dummy', ), {}), # URL arg (('/api/v1/file?path=subdir%2Fdummy', ), {}), # form data (('/api/v1/file', ), { 'data': { 'path': testpath } }), (('/api/v1/file', ), { 'data': json.dumps(dict(path=testpath)), 'content_type': 'application/json' }), ): rq = c.get(*a, **kwa) assert rq.status_code == 200 assert rq.get_json()['path'] == testpath assert rq.get_json()['content'] == file_content for j, target in ( ('no', file_content), # JSON decoding ('yes', { 'three': 3 }), # JSON stream decoding ('stream', [{ 'three': 3 }]), ): assert c.get( '/api/v1/file', data=json.dumps(dict(path=testpath, json=j)), content_type='application/json', ).get_json()['content'] == target
def test_run_explicit(path): ds = Dataset(path) assert_false(ds.repo.file_has_content("test-annex.dat")) create_tree(ds.path, { "dirt_untracked": "untracked", "dirt_modified": "modified" }) ds.save("dirt_modified", to_git=True) with open(op.join(path, "dirt_modified"), "a") as ofh: ofh.write(", more") # We need explicit=True to run with dirty repo. assert_status( "impossible", ds.run("cat test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], on_failure="ignore")) hexsha_initial = ds.repo.get_hexsha() # If we specify test-annex.dat as an input, it will be retrieved before the # run. ds.run("cat test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], explicit=True) ok_(ds.repo.file_has_content("test-annex.dat")) # We didn't commit anything because outputs weren't specified. assert_false(ds.repo.file_has_content("doubled.dat")) eq_(hexsha_initial, ds.repo.get_hexsha()) # If an input doesn't exist, we just show the standard warning. with swallow_logs(new_level=logging.WARN) as cml: with swallow_outputs(): ds.run("ls", inputs=["not-there"], explicit=True) assert_in("Input does not exist: ", cml.out) remove(op.join(path, "doubled.dat")) hexsha_initial = ds.repo.get_hexsha() ds.run("cat test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], outputs=["doubled.dat"], explicit=True) ok_(ds.repo.file_has_content("doubled.dat")) assert_repo_status(ds.path, modified=["dirt_modified"], untracked=['dirt_untracked']) neq_(hexsha_initial, ds.repo.get_hexsha()) # Saving explicit outputs works from subdirectories. subdir = op.join(path, "subdir") mkdir(subdir) with chpwd(subdir): run("echo insubdir >foo", explicit=True, outputs=["foo"]) ok_(ds.repo.file_has_content(op.join("subdir", "foo")))
def get_baseline(p): ds = Dataset(p).create() with chpwd(ds.path): subds = create('sub') ds.add('sub', save=False) create_tree(subds.path, {"staged": ""}) subds.add("staged", save=False) # subdataset has staged changes. ok_clean_git(ds.path, index_modified=['sub']) return ds
def test_save_partial_index(path): ds = Dataset(path).create(force=True) ds.add("foo") ok_clean_git(ds.path) ds.unlock(path="foo") create_tree(ds.path, tree={"foo": "a", "staged": ""}, remove_existing=True) ds.repo.add("staged", git=True) ds.save(path="foo") ok_clean_git(ds.path, head_modified=["staged"])
def test_no_interaction_with_untracked_content(path): # extracted from what was a metadata test originally ds = Dataset(opj(path, 'origin')).create(force=True) create_tree(ds.path, {'sub': {'subsub': {'dat': 'lots of data'}}}) subds = ds.create('sub', force=True) subds.remove(opj('.datalad', 'config'), if_dirty='ignore') ok_(not exists(opj(subds.path, '.datalad', 'config'))) # this will only work, if `remove` didn't do anything stupid and # caused all content to be saved subds.create('subsub', force=True)
def test_add_message_file(path): ds = Dataset(path).create() with assert_raises(ValueError): ds.add("blah", message="me", message_file="and me") create_tree(path, {"foo": "x", "msg": u"add β"}) ds.add("foo", message_file=opj(ds.path, "msg")) assert_equal(ds.repo.format_commit("%s"), u"add β")
def test_save_partial_index(path): ds = Dataset(path).create(force=True) ds.add("foo") ok_clean_git(ds.path) ds.unlock(path="foo") create_tree(ds.path, tree={"foo": "a", "staged": ""}, remove_existing=True) ds.repo.add("staged", git=True) ds._save(path="foo") ok_clean_git(ds.path, head_modified=["staged"])
def get_baseline(p): ds = Dataset(p).create() with chpwd(ds.path): subds = create('sub') ds.add('sub', save=False) create_tree(subds.path, {"staged": ""}) subds.add("staged", save=False) # subdataset has staged changes. ok_clean_git(ds.path, index_modified=['sub']) return ds
def test_replace_and_relative_sshpath(src_path, dst_path): # We need to come up with the path relative to our current home directory # https://github.com/datalad/datalad/issues/1653 # but because we override HOME the HOME on the remote end would be # different even though a localhost. So we need to query it from datalad import ssh_manager ssh = ssh_manager.get_connection('localhost') remote_home, err = ssh('pwd') assert not err remote_home = remote_home.rstrip('\n') dst_relpath = os.path.relpath(dst_path, remote_home) url = 'localhost:%s' % dst_relpath ds = Dataset(src_path).create() create_tree(ds.path, {'sub.dat': 'lots of data'}) ds.add('sub.dat') ds.create_sibling(url, ui=True) published = ds.publish(to='localhost', transfer_data='all') assert_result_count(published, 1, path=opj(ds.path, 'sub.dat')) # verify that hook runs and there is nothing in stderr # since it exits with 0 exit even if there was a problem out, err = Runner(cwd=opj(dst_path, '.git'))(_path_('hooks/post-update')) assert_false(out) assert_false(err) # Verify that we could replace and publish no problem # https://github.com/datalad/datalad/issues/1656 # Strangely it spits outs IncompleteResultsError exception atm... so just # checking that it fails somehow res = ds.create_sibling(url, on_failure='ignore') assert_status('error', res) assert_in('already configured', res[0]['message'][0]) # "Settings" such as UI do not persist, so we specify it again # for the test below depending on it ds.create_sibling(url, existing='replace', ui=True) published2 = ds.publish(to='localhost', transfer_data='all') assert_result_count(published2, 1, path=opj(ds.path, 'sub.dat')) # and one more test since in above test it would not puke ATM but just # not even try to copy since it assumes that file is already there create_tree(ds.path, {'sub2.dat': 'more data'}) ds.add('sub2.dat') published3 = ds.publish(to='localhost', transfer_data='none') # we publish just git assert_result_count(published3, 0, path=opj(ds.path, 'sub2.dat')) # now publish "with" data, which should also trigger the hook! # https://github.com/datalad/datalad/issues/1658 from glob import glob from datalad.consts import WEB_META_LOG logs_prior = glob(_path_(dst_path, WEB_META_LOG, '*')) published4 = ds.publish(to='localhost', transfer_data='all') assert_result_count(published4, 1, path=opj(ds.path, 'sub2.dat')) logs_post = glob(_path_(dst_path, WEB_META_LOG, '*')) eq_(len(logs_post), len(logs_prior) + 1) assert_postupdate_hooks(dst_path)
def test_external_versions_rogue_module(topd): ev = ExternalVersions() # if module throws some other non-ImportError exception upon import # we must not crash, but issue a warning modname = 'verycustomrogue__' create_tree(topd, {modname + '.py': 'raise Exception("pickaboo")'}) with patch('sys.path', [topd]), \ swallow_logs(new_level=logging.WARNING) as cml: assert ev[modname] is None assert_true(ev.dumps(indent=True).endswith(linesep)) assert_in('pickaboo', cml.out)
def test_replace_and_relative_sshpath(src_path, dst_path): # We need to come up with the path relative to our current home directory # https://github.com/datalad/datalad/issues/1653 # but because we override HOME the HOME on the remote end would be # different even though a localhost. So we need to query it from datalad import ssh_manager ssh = ssh_manager.get_connection('localhost') remote_home, err = ssh('pwd') assert not err remote_home = remote_home.rstrip('\n') dst_relpath = os.path.relpath(dst_path, remote_home) url = 'localhost:%s' % dst_relpath ds = Dataset(src_path).create() create_tree(ds.path, {'sub.dat': 'lots of data'}) ds.save('sub.dat') ds.create_sibling(url, ui=True) published = ds.publish(to='localhost', transfer_data='all') assert_result_count(published, 1, path=opj(ds.path, 'sub.dat')) # verify that hook runs and there is nothing in stderr # since it exits with 0 exit even if there was a problem out, err = Runner(cwd=opj(dst_path, '.git'))(_path_('hooks/post-update')) assert_false(out) assert_false(err) # Verify that we could replace and publish no problem # https://github.com/datalad/datalad/issues/1656 # Strangely it spits outs IncompleteResultsError exception atm... so just # checking that it fails somehow res = ds.create_sibling(url, on_failure='ignore') assert_status('error', res) assert_in('already configured', res[0]['message'][0]) # "Settings" such as UI do not persist, so we specify it again # for the test below depending on it ds.create_sibling(url, existing='replace', ui=True) published2 = ds.publish(to='localhost', transfer_data='all') assert_result_count(published2, 1, path=opj(ds.path, 'sub.dat')) # and one more test since in above test it would not puke ATM but just # not even try to copy since it assumes that file is already there create_tree(ds.path, {'sub2.dat': 'more data'}) ds.save('sub2.dat') published3 = ds.publish(to='localhost', transfer_data='none') # we publish just git assert_result_count(published3, 0, path=opj(ds.path, 'sub2.dat')) # now publish "with" data, which should also trigger the hook! # https://github.com/datalad/datalad/issues/1658 from glob import glob from datalad.consts import WEB_META_LOG logs_prior = glob(_path_(dst_path, WEB_META_LOG, '*')) published4 = ds.publish(to='localhost', transfer_data='all') assert_result_count(published4, 1, path=opj(ds.path, 'sub2.dat')) logs_post = glob(_path_(dst_path, WEB_META_LOG, '*')) eq_(len(logs_post), len(logs_prior) + 1) assert_postupdate_hooks(dst_path)
def test_save_message_file(path): ds = Dataset(path).create() with assert_raises(ValueError): ds.save("blah", message="me", message_file="and me") create_tree(path, {"foo": "x", "msg": "add foo"}) ds.repo.add("foo") ds.save(message_file=op.join(ds.path, "msg")) eq_(ds.repo.repo.git.show("--format=%s", "--no-patch"), "add foo")
def test_save_message_file(path): ds = Dataset(path).create() with assert_raises(ValueError): ds.save("blah", message="me", message_file="and me") create_tree(path, {"foo": "x", "msg": "add foo"}) ds.repo.add("foo") ds.save(message_file=op.join(ds.path, "msg")) # ATTN: Use master explicitly so that this check works when we're on an # adjusted branch too (e.g., when this test is executed under Windows). eq_(ds.repo.format_commit("%s", "master"), "add foo")
def test_update_git_smoke(src_path, dst_path): # Apparently was just failing on git repos for basic lack of coverage, hence this quick test ds = Dataset(src_path).create(no_annex=True) target = install( dst_path, source=src_path, result_xfm='datasets', return_type='item-or-list') create_tree(ds.path, {'file.dat': '123'}) ds.save('file.dat') assert_result_count( target.update(recursive=True, merge=True), 1, status='ok', type='dataset') ok_file_has_content(opj(target.path, 'file.dat'), '123')
def test_newthings_coming_down(originpath, destpath): origin = GitRepo(originpath, create=True) create_tree(originpath, {'load.dat': 'heavy'}) Dataset(originpath).save('load.dat') ds = install( source=originpath, path=destpath, result_xfm='datasets', return_type='item-or-list') assert_is_instance(ds.repo, GitRepo) assert_in('origin', ds.repo.get_remotes()) # turn origin into an annex origin = AnnexRepo(originpath, create=True) # clone doesn't know yet assert_false(knows_annex(ds.path)) # but after an update it should # no merge, only one sibling, no parameters should be specific enough assert_result_count(ds.update(), 1, status='ok', type='dataset') assert(knows_annex(ds.path)) # no branches appeared eq_(ds.repo.get_branches(), ['master']) # now merge, and get an annex assert_result_count(ds.update(merge=True), 1, status='ok', type='dataset') assert_in('git-annex', ds.repo.get_branches()) assert_is_instance(ds.repo, AnnexRepo) # should be fully functional testfname = opj(ds.path, 'load.dat') assert_false(ds.repo.file_has_content(testfname)) ds.get('.') ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy') # check that a new tag comes down origin.tag('first!') assert_result_count(ds.update(), 1, status='ok', type='dataset') eq_(ds.repo.get_tags(output='name')[0], 'first!') # and now we destroy the remote annex origin._git_custom_command([], ['git', 'config', '--remove-section', 'annex']) rmtree(opj(origin.path, '.git', 'annex'), chmod_files=True) origin._git_custom_command([], ['git', 'branch', '-D', 'git-annex']) origin = GitRepo(originpath) assert_false(knows_annex(originpath)) # and update the local clone # for now this should simply not fail (see gh-793), later might be enhanced to a # graceful downgrade before_branches = ds.repo.get_branches() assert_result_count(ds.update(), 1, status='ok', type='dataset') eq_(before_branches, ds.repo.get_branches()) # annex branch got pruned eq_(['origin/HEAD', 'origin/master'], ds.repo.get_remote_branches()) # check that a new tag comes down even if repo types mismatch origin.tag('second!') assert_result_count(ds.update(), 1, status='ok', type='dataset') eq_(ds.repo.get_tags(output='name')[-1], 'second!')
def _test_target_ssh_inherit(standardgroup, src_path, target_path): ds = Dataset(src_path).create() target_url = 'localhost:%s' % target_path remote = "magical" # for the test of setting a group, will just smoke test while using current # user's group ds.create_sibling(target_url, name=remote, shared='group', group=os.getgid()) # not doing recursively if standardgroup: ds.repo.set_preferred_content('wanted', 'standard', remote) ds.repo.set_preferred_content('group', standardgroup, remote) ds.publish(to=remote) # now a month later we created a new subdataset subds = ds.create('sub') # so now we got a hierarchy! create_tree(subds.path, {'sub.dat': 'lots of data'}) subds.add('sub.dat') ok_file_under_git(subds.path, 'sub.dat', annexed=True) target_sub = Dataset(opj(target_path, 'sub')) # since we do not have yet/thus have not used an option to record to publish # to that sibling by default (e.g. --set-upstream), if we run just ds.publish # -- should fail assert_result_count( ds.publish(on_failure='ignore'), 1, status='impossible', message='No target sibling configured for default publication, please specific via --to') ds.publish(to=remote) # should be ok, non recursive; BUT it (git or us?) would # create an empty sub/ directory ok_(not target_sub.is_installed()) # still not there res = ds.publish(to=remote, recursive=True, on_failure='ignore') assert_result_count(res, 2) assert_status(('error', 'notneeded'), res) assert_result_count( res, 1, status='error', message=("Unknown target sibling '%s' for publication", 'magical')) ds.publish(to=remote, recursive=True, missing='inherit') # we added the remote and set all the eq_(subds.repo.get_preferred_content('wanted', remote), 'standard' if standardgroup else '') eq_(subds.repo.get_preferred_content('group', remote), standardgroup or '') ok_(target_sub.is_installed()) # it is there now eq_(target_sub.repo.config.get('core.sharedrepository'), '1') # and we have transferred the content if standardgroup and standardgroup == 'backup': # only then content should be copied ok_file_has_content(opj(target_sub.path, 'sub.dat'), 'lots of data') else: # otherwise nothing is copied by default assert_false(target_sub.repo.file_has_content('sub.dat'))
def test_run_explicit(path): ds = Dataset(path) assert_false(ds.repo.file_has_content("test-annex.dat")) create_tree(ds.path, {"dirt_untracked": "untracked", "dirt_modified": "modified"}) ds.save("dirt_modified", to_git=True) with open(op.join(path, "dirt_modified"), "a") as ofh: ofh.write(", more") # We need explicit=True to run with dirty repo. assert_status("impossible", ds.run("cat test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], on_failure="ignore")) hexsha_initial = ds.repo.get_hexsha() # If we specify test-annex.dat as an input, it will be retrieved before the # run. ds.run("cat test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], explicit=True) ok_(ds.repo.file_has_content("test-annex.dat")) # We didn't commit anything because outputs weren't specified. assert_false(ds.repo.file_has_content("doubled.dat")) eq_(hexsha_initial, ds.repo.get_hexsha()) # If an input doesn't exist, we just show the standard warning. with swallow_logs(new_level=logging.WARN) as cml: with swallow_outputs(): ds.run("ls", inputs=["not-there"], explicit=True) assert_in("Input does not exist: ", cml.out) remove(op.join(path, "doubled.dat")) hexsha_initial = ds.repo.get_hexsha() ds.run("cat test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], outputs=["doubled.dat"], explicit=True) ok_(ds.repo.file_has_content("doubled.dat")) assert_repo_status(ds.path, modified=["dirt_modified"], untracked=['dirt_untracked']) neq_(hexsha_initial, ds.repo.get_hexsha()) # Saving explicit outputs works from subdirectories. subdir = op.join(path, "subdir") mkdir(subdir) with chpwd(subdir): run("echo insubdir >foo", explicit=True, outputs=["foo"]) ok_(ds.repo.file_has_content(op.join("subdir", "foo")))
def test_override_existing_under_git(self): create_tree(self.annex.path, {'1.dat': 'load2'}) self.annex.add('1.dat', git=True) self.annex.commit('added to git') add_archive_content( '1.tar', annex=self.annex, strip_leading_dirs=True, ) # and we did not bother adding it to annex (for now) -- just skipped # since we have it and it is the same ok_file_under_git(self.annex.path, '1.dat', annexed=False) # but if we say 'overwrite' -- we would remove and replace add_archive_content( '1.tar', annex=self.annex, strip_leading_dirs=True, delete=True , existing='overwrite' ) ok_file_under_git(self.annex.path, '1.dat', annexed=True)
def test_diff_recursive(path): ds = Dataset(path).create() sub = ds.create('sub') # look at the last change, and confirm a dataset was added res = ds.diff(revision='HEAD~1..HEAD') assert_result_count(res, 1, action='diff', state='added', path=sub.path, type='dataset') # now recursive res = ds.diff(recursive=True, revision='HEAD~1..HEAD') # we also get the entire diff of the subdataset from scratch assert_status('ok', res) ok_(len(res) > 3) # one specific test assert_result_count(res, 1, action='diff', state='added', path=opj(sub.path, '.datalad', 'config')) # now we add a file to just the parent create_tree(ds.path, {'onefile': 'tobeadded', 'sub': {'twofile': 'tobeadded'}}) res = ds.diff(recursive=True, report_untracked='all') assert_result_count(res, 3) assert_result_count(res, 1, action='diff', state='untracked', path=opj(ds.path, 'onefile'), type='file') assert_result_count(res, 1, action='diff', state='modified', path=sub.path, type='dataset') assert_result_count(res, 1, action='diff', state='untracked', path=opj(sub.path, 'twofile'), type='file') # save sub sub.add('.') # save sub in parent ds.save() # save addition in parent ds.add('.') ok_clean_git(ds.path) # look at the last change, only one file was added res = ds.diff(revision='HEAD~1..HEAD') assert_result_count(res, 1) assert_result_count(res, 1, action='diff', state='added', path=opj(ds.path, 'onefile'), type='file') # now the exact same thing with recursion, must not be different from the call # above res = ds.diff(recursive=True, revision='HEAD~1..HEAD') assert_result_count(res, 1) # last change in parent assert_result_count(res, 1, action='diff', state='added', path=opj(ds.path, 'onefile'), type='file') # one further back brings in the modified subdataset, and the added file within it res = ds.diff(recursive=True, revision='HEAD~2..HEAD') assert_result_count(res, 3) assert_result_count(res, 1, action='diff', state='added', path=opj(ds.path, 'onefile'), type='file') assert_result_count(res, 1, action='diff', state='added', path=opj(sub.path, 'twofile'), type='file') assert_result_count(res, 1, action='diff', state='modified', path=sub.path, type='dataset')
def test_add_delete_after_and_drop_subdir(self): os.mkdir(opj(self.annex.path, 'subdir')) mv_out = self.annex._git_custom_command( [], ['git', 'mv', '1.tar', 'subdir'] ) self.annex.commit("moved into subdir") with chpwd(self.annex.path): # was failing since deleting without considering if tarball # was extracted in that tarball directory commits_prior_master = list(self.annex.get_branch_commits()) commits_prior = list(self.annex.get_branch_commits('git-annex')) add_out = add_archive_content( opj('subdir', '1.tar'), delete_after=True, drop_after=True) ok_clean_git(self.annex.path) commits_after_master = list(self.annex.get_branch_commits()) commits_after = list(self.annex.get_branch_commits('git-annex')) # There should be a single commit for all additions +1 to # initiate datalad-archives gh-1258. If faking dates, # there should be another +1 because annex.alwayscommit # isn't set to false. assert_equal(len(commits_after), len(commits_prior) + 2 + self.annex.fake_dates_enabled) assert_equal(len(commits_after_master), len(commits_prior_master)) assert(add_out is self.annex) # there should be no .datalad temporary files hanging around self.assert_no_trash_left_behind() # and if we add some untracked file, redo, there should be no changes # to master and file should remain not committed create_tree(self.annex.path, {'dummy.txt': '123'}) assert_true(self.annex.dirty) # untracked file add_out = add_archive_content( opj('subdir', '1.tar'), delete_after=True, drop_after=True, allow_dirty=True) ok_clean_git(self.annex.path, untracked=['dummy.txt']) assert_equal(len(list(self.annex.get_branch_commits())), len(commits_prior_master)) # there should be no .datalad temporary files hanging around self.assert_no_trash_left_behind()
def test_save_to_git(path): ds = Dataset(path).create() create_tree( ds.path, { 'file_ingit': 'file_ingit', 'file_inannex': 'file_inannex', } ) ds.repo.save(paths=['file_ingit'], git=True) ds.repo.save(paths=['file_inannex']) assert_repo_status(ds.repo) for f, p in iteritems(ds.repo.annexstatus()): eq_(p['state'], 'clean') if f.match('*ingit'): assert_not_in('key', p, f) elif f.match('*inannex'): assert_in('key', p, f)
def test_get_invalid_call(path, file_outside): # no argument at all: assert_raises(InsufficientArgumentsError, get, None) assert_raises(InsufficientArgumentsError, get, []) # invalid dataset: assert_raises(ValueError, get, None, dataset=path) # have a plain git: ds = Dataset(path) ds.create(no_annex=True) with open(opj(path, "some.txt"), "w") as f: f.write("whatever") ds.add("some.txt", to_git=True) ds.save("Initial commit.") # make it an annex: AnnexRepo(path, init=True, create=True) # call get again on a file in git: result = ds.get("some.txt") # skipped silently, but no result for this file: eq_(len(result), 0) # invalid source: # yoh: but now we would need to add it to annex since clever code first # checks what needs to be fetched at all create_tree(path, {'annexed.dat': 'some'}) ds.add("annexed.dat") ds.repo.drop("annexed.dat", options=['--force']) with assert_raises(RemoteNotAvailableError) as ce: ds.get("annexed.dat", source='MysteriousRemote') eq_("MysteriousRemote", ce.exception.remote) # warning on not existing file: with swallow_logs(new_level=logging.WARNING) as cml: with assert_raises(IncompleteResultsError) as cme: ds.get("NotExistingFile.txt") result = cme.exception.results eq_(len(result), 0) assert_in("ignored non-existing paths", cml.out) # path outside repo errors as with most other commands: assert_raises(ValueError, ds.get, file_outside)
def test_no_annex(path): ds = create(path) ok_clean_git(ds.path) create_tree( ds.path, {'code': { 'inannex': 'content', 'notinannex': 'othercontent'}, 'README': 'please'}) # add inannex pre configuration ds.save(opj('code', 'inannex')) no_annex(pattern=['code/**', 'README'], dataset=ds) # add inannex and README post configuration ds.save([opj('code', 'notinannex'), 'README']) ok_clean_git(ds.path) # one is annex'ed, the other is not, despite no change in add call # importantly, also .gitattribute is not annexed eq_([opj('code', 'inannex')], ds.repo.get_annexed_files())
def setup_class(cls): mktmp_kws = get_tempfile_kwargs() path = tempfile.mkdtemp(**mktmp_kws) create_tree(path, {"udir": {x + ".dat" + ver: x + " content" for x in "abcd" for ver in ["", ".v1"]}}) cls._hpath = HTTPPath(path) cls._hpath.start() cls.url = cls._hpath.url cls.json_file = tempfile.mktemp(suffix=".json", **mktmp_kws) with open(cls.json_file, "w") as jfh: json.dump( [{"url": cls.url + "udir/a.dat", "name": "a", "subdir": "foo"}, {"url": cls.url + "udir/b.dat", "name": "b", "subdir": "bar"}, {"url": cls.url + "udir/c.dat", "name": "c", "subdir": "foo"}], jfh)
def test_failon_nodrop(path): # test to make sure that we do not wipe out data when checks are enabled # despite the general error behavior mode ds = Dataset(path).create() # we play with a subdataset to bypass the tests that prevent the removal # of top-level datasets sub = ds.create('sub') create_tree(sub.path, {'test': 'content'}) ds.save(opj('sub', 'test')) ok_clean_git(ds.path) eq_(['test'], sub.repo.get_annexed_files(with_content_only=True)) # we put one file into the dataset's annex, no redundant copies # neither uninstall nor remove should work res = ds.uninstall('sub', check=True, on_failure='ignore') assert_status(['error', 'impossible'], res) eq_(['test'], sub.repo.get_annexed_files(with_content_only=True)) # same with remove res = ds.remove('sub', check=True, on_failure='ignore') assert_status(['error', 'impossible'], res) eq_(['test'], sub.repo.get_annexed_files(with_content_only=True))
def test_get_tags(path): from mock import patch gr = GitRepo(path, create=True) eq_(gr.get_tags(), []) eq_(gr.describe(), None) # Explicitly override the committer date because tests may set it to a # fixed value, but we want to check that the returned tags are sorted by # the committer date. with patch.dict("os.environ", {"GIT_COMMITTER_DATE": "Thu, 07 Apr 2005 22:13:13 +0200"}): create_tree(gr.path, {'file': ""}) gr.add('file') gr.commit(msg="msg") eq_(gr.get_tags(), []) eq_(gr.describe(), None) gr.tag("nonannotated") tags1 = [{'name': 'nonannotated', 'hexsha': gr.get_hexsha()}] eq_(gr.get_tags(), tags1) eq_(gr.describe(), None) eq_(gr.describe(tags=True), tags1[0]['name']) first_commit = gr.get_hexsha() with patch.dict("os.environ", {"GIT_COMMITTER_DATE": "Fri, 08 Apr 2005 22:13:13 +0200"}): create_tree(gr.path, {'file': "123"}) gr.add('file') gr.commit(msg="changed") gr.tag("annotated", message="annotation") tags2 = tags1 + [{'name': 'annotated', 'hexsha': gr.get_hexsha()}] eq_(gr.get_tags(), tags2) eq_(gr.describe(), tags2[1]['name']) # compare prev commit eq_(gr.describe(commitish=first_commit), None) eq_(gr.describe(commitish=first_commit, tags=True), tags1[0]['name'])
def test_get_invalid_call(path, file_outside): # no argument at all: assert_raises(InsufficientArgumentsError, get, None) assert_raises(InsufficientArgumentsError, get, []) # invalid dataset: assert_status('impossible', get(None, dataset=path, on_failure='ignore')) # have a plain git: ds = Dataset(path) ds.create(no_annex=True) with open(opj(path, "some.txt"), "w") as f: f.write("whatever") ds.save("some.txt", to_git=True, message="Initial commit.") # make it an annex (remove indicator file that create has placed # in the dataset to make it possible): (ds.pathobj / '.noannex').unlink() AnnexRepo(path, init=True, create=True) # call get again on a file in git: result = ds.get("some.txt") assert_status('notneeded', result) # invalid source: # yoh: but now we would need to add it to annex since clever code first # checks what needs to be fetched at all create_tree(path, {'annexed.dat': 'some'}) ds.save("annexed.dat") ds.repo.drop("annexed.dat", options=['--force']) with assert_raises(RemoteNotAvailableError) as ce: ds.get("annexed.dat", source='MysteriousRemote') eq_("MysteriousRemote", ce.exception.remote) res = ds.get("NotExistingFile.txt", on_failure='ignore') assert_status('impossible', res) assert_message("path does not exist", res) # path outside repo errors as with most other commands: res = ds.get(file_outside, on_failure='ignore') assert_in_results( res, status='impossible', message='path not associated with any dataset')