def test_create_tree(path=None): content = u"мама мыла раму" create_tree( path, OrderedDict([ ('1', content), ( 'sd', OrderedDict([ # right away an obscure case where we have both 1 and 1.gz ('1', content * 2), ('1.gz', content * 3), ('1.xz', content * 4), ('1.lzma', content * 5), ])), ])) ok_file_has_content(op.join(path, '1'), content) ok_file_has_content(op.join(path, 'sd', '1'), content * 2) ok_file_has_content(op.join(path, 'sd', '1.gz'), content * 3, decompress=True) ok_file_has_content(op.join(path, 'sd', '1.xz'), content * 4, decompress=True) ok_file_has_content(op.join(path, 'sd', '1.lzma'), content * 5, decompress=True)
def time_make_studyforrest_mockup(self): path = self.path # Carries a copy of the # datalad.tests.utils_testdatasets.py:make_studyforrest_mockup # as of 0.12.0rc2-76-g6ba6d53b # A copy is made so we do not reflect in the benchmark results changes # to that helper's code. This copy only tests on 2 not 3 analyses # subds public = create(opj(path, 'public'), description="umbrella dataset") # the following tries to capture the evolution of the project phase1 = public.create('phase1', description='old-style, no connection to RAW') structural = public.create('structural', description='anatomy') tnt = public.create('tnt', description='image templates') tnt.clone(source=phase1.path, path=opj('src', 'phase1'), reckless=True) tnt.clone(source=structural.path, path=opj('src', 'structural'), reckless=True) aligned = public.create('aligned', description='aligned image data') aligned.clone(source=phase1.path, path=opj('src', 'phase1'), reckless=True) aligned.clone(source=tnt.path, path=opj('src', 'tnt'), reckless=True) # new acquisition labet = create(opj(path, 'private', 'labet'), description="raw data ET") phase2_dicoms = create(opj(path, 'private', 'p2dicoms'), description="raw data P2MRI") phase2 = public.create('phase2', description='new-style, RAW connection') phase2.clone(source=labet.path, path=opj('src', 'labet'), reckless=True) phase2.clone(source=phase2_dicoms.path, path=opj('src', 'dicoms'), reckless=True) # add to derivatives tnt.clone(source=phase2.path, path=opj('src', 'phase2'), reckless=True) aligned.clone(source=phase2.path, path=opj('src', 'phase2'), reckless=True) # never to be published media files media = create(opj(path, 'private', 'media'), description="raw data ET") # assuming all annotations are in one dataset (in reality this is also # a superdatasets with about 10 subdatasets annot = public.create('annotations', description='stimulus annotation') annot.clone(source=media.path, path=opj('src', 'media'), reckless=True) # a few typical analysis datasets # (just doing 2, actual status quo is just shy of 10) # and also the real goal -> meta analysis metaanalysis = public.create('metaanalysis', description="analysis of analyses") for i in range(1, 2): ana = public.create('analysis{}'.format(i), description='analysis{}'.format(i)) ana.clone(source=annot.path, path=opj('src', 'annot'), reckless=True) ana.clone(source=aligned.path, path=opj('src', 'aligned'), reckless=True) ana.clone(source=tnt.path, path=opj('src', 'tnt'), reckless=True) # link to metaanalysis metaanalysis.clone(source=ana.path, path=opj('src', 'ana{}'.format(i)), reckless=True) # simulate change in an input (but not raw) dataset create_tree( aligned.path, {'modification{}.txt'.format(i): 'unique{}'.format(i)}) aligned.add('.') # finally aggregate data aggregate = public.create('aggregate', description='aggregate data') aggregate.clone(source=aligned.path, path=opj('src', 'aligned'), reckless=True)
def test_downloader_download(urlpath=None, url=None, path=None): path = Path(path) downloader = SHubDownloader() downloader.api_url = url create_tree(urlpath, tree={ "data": "foo", "org": { "repo": '{{"name":"org/repo","image":"{}"}}'.format(url + "data") } }) target = str(path / "target") downloader.download("shub://org/repo", target) ok_file_has_content(target, "foo") other_target = str(path / "other-target") downloader.download("shub://org/repo", other_target)
def test_downloader_bad_json(urlpath=None, url=None): downloader = SHubDownloader() downloader.api_url = url create_tree(urlpath, tree={"org": {"repo": ''}}) with assert_raises(DownloadError): downloader.download("shub://org/repo", urlpath)
}, 'README.md': '', # TODO 'CHANGELOG.md': '', # TODO } # unless taken care of by the template already, each item in here # will get its own .gitattributes entry to keep it out of the annex # give relative path to dataset root (use platform notation) force_in_git = [ 'README.md', 'CHANGELOG.md', ] ################################################################### # actually dump everything into the dataset create_tree(ds.path, tmpl) # amend gitattributes for path in force_in_git: abspath = op.join(ds.path, path) d = op.dirname(abspath) ga_path = op.join(d, '.gitattributes') \ if op.exists(d) else op.join(ds.path, '.gitattributes') with open(ga_path, 'a') as gaf: gaf.write('{} annex.largefiles=nothing\n'.format( op.relpath(abspath, start=d) if op.exists(d) else path)) # leave clean # TODO only commit actually changed/added files ds.add( path=[dict(path=ds.path, type='dataset', parentds=ds.path)],
def time_make_studyforrest_mockup(self): path = self.path # Carries a copy of the # datalad.tests.utils_testdatasets.py:make_studyforrest_mockup # as of 0.12.0rc2-76-g6ba6d53b # A copy is made so we do not reflect in the benchmark results changes # to that helper's code. This copy only tests on 2 not 3 analyses # subds public = create(opj(path, 'public'), description="umbrella dataset") # the following tries to capture the evolution of the project phase1 = public.create('phase1', description='old-style, no connection to RAW') structural = public.create('structural', description='anatomy') tnt = public.create('tnt', description='image templates') tnt.clone(source=phase1.path, path=opj('src', 'phase1'), reckless=True) tnt.clone(source=structural.path, path=opj('src', 'structural'), reckless=True) aligned = public.create('aligned', description='aligned image data') aligned.clone(source=phase1.path, path=opj('src', 'phase1'), reckless=True) aligned.clone(source=tnt.path, path=opj('src', 'tnt'), reckless=True) # new acquisition labet = create(opj(path, 'private', 'labet'), description="raw data ET") phase2_dicoms = create(opj(path, 'private', 'p2dicoms'), description="raw data P2MRI") phase2 = public.create('phase2', description='new-style, RAW connection') phase2.clone(source=labet.path, path=opj('src', 'labet'), reckless=True) phase2.clone(source=phase2_dicoms.path, path=opj('src', 'dicoms'), reckless=True) # add to derivatives tnt.clone(source=phase2.path, path=opj('src', 'phase2'), reckless=True) aligned.clone(source=phase2.path, path=opj('src', 'phase2'), reckless=True) # never to be published media files media = create(opj(path, 'private', 'media'), description="raw data ET") # assuming all annotations are in one dataset (in reality this is also # a superdatasets with about 10 subdatasets annot = public.create('annotations', description='stimulus annotation') annot.clone(source=media.path, path=opj('src', 'media'), reckless=True) # a few typical analysis datasets # (just doing 2, actual status quo is just shy of 10) # and also the real goal -> meta analysis metaanalysis = public.create('metaanalysis', description="analysis of analyses") for i in range(1, 2): ana = public.create('analysis{}'.format(i), description='analysis{}'.format(i)) ana.clone(source=annot.path, path=opj('src', 'annot'), reckless=True) ana.clone(source=aligned.path, path=opj('src', 'aligned'), reckless=True) ana.clone(source=tnt.path, path=opj('src', 'tnt'), reckless=True) # link to metaanalysis metaanalysis.clone(source=ana.path, path=opj('src', 'ana{}'.format(i)), reckless=True) # simulate change in an input (but not raw) dataset create_tree(aligned.path, {'modification{}.txt'.format(i): 'unique{}'.format(i)}) aligned.add('.') # finally aggregate data aggregate = public.create('aggregate', description='aggregate data') aggregate.clone(source=aligned.path, path=opj('src', 'aligned'), reckless=True)
}, 'README.md': README_top, 'CHANGELOG.md': '', # TODO } # unless taken care of by the template already, each item in here # will get its own .gitattributes entry to keep it out of the annex # give relative path to dataset root (use platform notation) force_in_git = [ 'README.md', 'CHANGELOG.md', ] ################################################################### # actually dump everything into the dataset create_tree(ds.path, tmpl) # all code goes into Git ds.repo.set_gitattributes([('*', {'annex.largefiles': 'nothing'})], op.join('code', '.gitattributes')) # amend gitattributes ds.repo.set_gitattributes( [(p, {'annex.largefiles': 'nothing'}) for p in force_in_git]) # leave clean # TODO only commit actually changed/added files ds.save( message="Apply YODA dataset setup", )