Beispiel #1
0
def test_create_tree(path=None):
    content = u"мама мыла раму"
    create_tree(
        path,
        OrderedDict([
            ('1', content),
            (
                'sd',
                OrderedDict([
                    # right away an obscure case where we have both 1 and 1.gz
                    ('1', content * 2),
                    ('1.gz', content * 3),
                    ('1.xz', content * 4),
                    ('1.lzma', content * 5),
                ])),
        ]))
    ok_file_has_content(op.join(path, '1'), content)
    ok_file_has_content(op.join(path, 'sd', '1'), content * 2)
    ok_file_has_content(op.join(path, 'sd', '1.gz'),
                        content * 3,
                        decompress=True)
    ok_file_has_content(op.join(path, 'sd', '1.xz'),
                        content * 4,
                        decompress=True)
    ok_file_has_content(op.join(path, 'sd', '1.lzma'),
                        content * 5,
                        decompress=True)
Beispiel #2
0
 def time_make_studyforrest_mockup(self):
     path = self.path
     # Carries a copy of the
     # datalad.tests.utils_testdatasets.py:make_studyforrest_mockup
     # as of 0.12.0rc2-76-g6ba6d53b
     # A copy is made so we do not reflect in the benchmark results changes
     # to that helper's code.  This copy only tests on 2 not 3 analyses
     # subds
     public = create(opj(path, 'public'), description="umbrella dataset")
     # the following tries to capture the evolution of the project
     phase1 = public.create('phase1',
                            description='old-style, no connection to RAW')
     structural = public.create('structural', description='anatomy')
     tnt = public.create('tnt', description='image templates')
     tnt.clone(source=phase1.path, path=opj('src', 'phase1'), reckless=True)
     tnt.clone(source=structural.path, path=opj('src', 'structural'), reckless=True)
     aligned = public.create('aligned', description='aligned image data')
     aligned.clone(source=phase1.path, path=opj('src', 'phase1'), reckless=True)
     aligned.clone(source=tnt.path, path=opj('src', 'tnt'), reckless=True)
     # new acquisition
     labet = create(opj(path, 'private', 'labet'), description="raw data ET")
     phase2_dicoms = create(opj(path, 'private', 'p2dicoms'), description="raw data P2MRI")
     phase2 = public.create('phase2',
                            description='new-style, RAW connection')
     phase2.clone(source=labet.path, path=opj('src', 'labet'), reckless=True)
     phase2.clone(source=phase2_dicoms.path, path=opj('src', 'dicoms'), reckless=True)
     # add to derivatives
     tnt.clone(source=phase2.path, path=opj('src', 'phase2'), reckless=True)
     aligned.clone(source=phase2.path, path=opj('src', 'phase2'), reckless=True)
     # never to be published media files
     media = create(opj(path, 'private', 'media'), description="raw data ET")
     # assuming all annotations are in one dataset (in reality this is also
     # a superdatasets with about 10 subdatasets
     annot = public.create('annotations', description='stimulus annotation')
     annot.clone(source=media.path, path=opj('src', 'media'), reckless=True)
     # a few typical analysis datasets
     # (just doing 2, actual status quo is just shy of 10)
     # and also the real goal -> meta analysis
     metaanalysis = public.create('metaanalysis', description="analysis of analyses")
     for i in range(1, 2):
         ana = public.create('analysis{}'.format(i),
                             description='analysis{}'.format(i))
         ana.clone(source=annot.path, path=opj('src', 'annot'), reckless=True)
         ana.clone(source=aligned.path, path=opj('src', 'aligned'), reckless=True)
         ana.clone(source=tnt.path, path=opj('src', 'tnt'), reckless=True)
         # link to metaanalysis
         metaanalysis.clone(source=ana.path, path=opj('src', 'ana{}'.format(i)),
                            reckless=True)
         # simulate change in an input (but not raw) dataset
         create_tree(
             aligned.path,
             {'modification{}.txt'.format(i): 'unique{}'.format(i)})
         aligned.add('.')
     # finally aggregate data
     aggregate = public.create('aggregate', description='aggregate data')
     aggregate.clone(source=aligned.path, path=opj('src', 'aligned'), reckless=True)
Beispiel #3
0
def test_downloader_download(urlpath=None, url=None, path=None):
    path = Path(path)
    downloader = SHubDownloader()
    downloader.api_url = url
    create_tree(urlpath,
                tree={
                    "data": "foo",
                    "org": {
                        "repo":
                        '{{"name":"org/repo","image":"{}"}}'.format(url +
                                                                    "data")
                    }
                })

    target = str(path / "target")
    downloader.download("shub://org/repo", target)
    ok_file_has_content(target, "foo")

    other_target = str(path / "other-target")
    downloader.download("shub://org/repo", other_target)
Beispiel #4
0
def test_downloader_bad_json(urlpath=None, url=None):
    downloader = SHubDownloader()
    downloader.api_url = url
    create_tree(urlpath, tree={"org": {"repo": ''}})
    with assert_raises(DownloadError):
        downloader.download("shub://org/repo", urlpath)
Beispiel #5
0
    },
    'README.md': '',  # TODO
    'CHANGELOG.md': '',  # TODO
}

# unless taken care of by the template already, each item in here
# will get its own .gitattributes entry to keep it out of the annex
# give relative path to dataset root (use platform notation)
force_in_git = [
    'README.md',
    'CHANGELOG.md',
]

###################################################################
# actually dump everything into the dataset
create_tree(ds.path, tmpl)

# amend gitattributes
for path in force_in_git:
    abspath = op.join(ds.path, path)
    d = op.dirname(abspath)
    ga_path = op.join(d, '.gitattributes') \
        if op.exists(d) else op.join(ds.path, '.gitattributes')
    with open(ga_path, 'a') as gaf:
        gaf.write('{} annex.largefiles=nothing\n'.format(
            op.relpath(abspath, start=d) if op.exists(d) else path))

# leave clean
# TODO only commit actually changed/added files
ds.add(
    path=[dict(path=ds.path, type='dataset', parentds=ds.path)],
Beispiel #6
0
 def time_make_studyforrest_mockup(self):
     path = self.path
     # Carries a copy of the
     # datalad.tests.utils_testdatasets.py:make_studyforrest_mockup
     # as of 0.12.0rc2-76-g6ba6d53b
     # A copy is made so we do not reflect in the benchmark results changes
     # to that helper's code.  This copy only tests on 2 not 3 analyses
     # subds
     public = create(opj(path, 'public'), description="umbrella dataset")
     # the following tries to capture the evolution of the project
     phase1 = public.create('phase1',
                            description='old-style, no connection to RAW')
     structural = public.create('structural', description='anatomy')
     tnt = public.create('tnt', description='image templates')
     tnt.clone(source=phase1.path, path=opj('src', 'phase1'), reckless=True)
     tnt.clone(source=structural.path,
               path=opj('src', 'structural'),
               reckless=True)
     aligned = public.create('aligned', description='aligned image data')
     aligned.clone(source=phase1.path,
                   path=opj('src', 'phase1'),
                   reckless=True)
     aligned.clone(source=tnt.path, path=opj('src', 'tnt'), reckless=True)
     # new acquisition
     labet = create(opj(path, 'private', 'labet'),
                    description="raw data ET")
     phase2_dicoms = create(opj(path, 'private', 'p2dicoms'),
                            description="raw data P2MRI")
     phase2 = public.create('phase2',
                            description='new-style, RAW connection')
     phase2.clone(source=labet.path,
                  path=opj('src', 'labet'),
                  reckless=True)
     phase2.clone(source=phase2_dicoms.path,
                  path=opj('src', 'dicoms'),
                  reckless=True)
     # add to derivatives
     tnt.clone(source=phase2.path, path=opj('src', 'phase2'), reckless=True)
     aligned.clone(source=phase2.path,
                   path=opj('src', 'phase2'),
                   reckless=True)
     # never to be published media files
     media = create(opj(path, 'private', 'media'),
                    description="raw data ET")
     # assuming all annotations are in one dataset (in reality this is also
     # a superdatasets with about 10 subdatasets
     annot = public.create('annotations', description='stimulus annotation')
     annot.clone(source=media.path, path=opj('src', 'media'), reckless=True)
     # a few typical analysis datasets
     # (just doing 2, actual status quo is just shy of 10)
     # and also the real goal -> meta analysis
     metaanalysis = public.create('metaanalysis',
                                  description="analysis of analyses")
     for i in range(1, 2):
         ana = public.create('analysis{}'.format(i),
                             description='analysis{}'.format(i))
         ana.clone(source=annot.path,
                   path=opj('src', 'annot'),
                   reckless=True)
         ana.clone(source=aligned.path,
                   path=opj('src', 'aligned'),
                   reckless=True)
         ana.clone(source=tnt.path, path=opj('src', 'tnt'), reckless=True)
         # link to metaanalysis
         metaanalysis.clone(source=ana.path,
                            path=opj('src', 'ana{}'.format(i)),
                            reckless=True)
         # simulate change in an input (but not raw) dataset
         create_tree(aligned.path,
                     {'modification{}.txt'.format(i): 'unique{}'.format(i)})
         aligned.add('.')
     # finally aggregate data
     aggregate = public.create('aggregate', description='aggregate data')
     aggregate.clone(source=aligned.path,
                     path=opj('src', 'aligned'),
                     reckless=True)
Beispiel #7
0
    },
    'README.md': README_top,
    'CHANGELOG.md': '',  # TODO
}

# unless taken care of by the template already, each item in here
# will get its own .gitattributes entry to keep it out of the annex
# give relative path to dataset root (use platform notation)
force_in_git = [
    'README.md',
    'CHANGELOG.md',
]

###################################################################
# actually dump everything into the dataset
create_tree(ds.path, tmpl)

# all code goes into Git
ds.repo.set_gitattributes([('*', {'annex.largefiles': 'nothing'})],
                          op.join('code', '.gitattributes'))

# amend gitattributes
ds.repo.set_gitattributes(
    [(p, {'annex.largefiles': 'nothing'}) for p in force_in_git])

# leave clean
# TODO only commit actually changed/added files
ds.save(
    message="Apply YODA dataset setup",
)