Beispiel #1
0
def test_download_url_need_datalad_remote(path=None):
    # publicly available (requires anonymous s3 access, so still needs our special remote)
    url = "s3://dandiarchive/ros3test.hdf5"
    path = Path(path)
    ds_a = Dataset(path / "a").create()
    ds_a.download_url([url], path="foo")
    ds_b = clone(source=ds_a.path,
                 path=str(path / "b"),
                 result_xfm="datasets",
                 return_type="item-or-list")
    ds_b.repo.remove_remote(DEFAULT_REMOTE)
    ds_b.get("foo")
    ok_(ds_b.repo.file_has_content("foo"))
Beispiel #2
0
def test_download_url_archive_trailing_separator(toppath=None,
                                                 topurl=None,
                                                 path=None):
    ds = Dataset(path).create()
    # Archives will be extracted in the specified subdirectory, which doesn't
    # need to exist.
    ds.download_url([topurl + "a0.tar.gz"],
                    path=opj("with-slash", ""),
                    archive=True)
    ok_(ds.repo.file_has_content(opj("with-slash", "a0", "f0.txt")))
    # But if the path doesn't have a trailing separator, it will not be
    # considered a directory. The archive will be downloaded to that path and
    # then extracted in the top-level of the dataset.
    ds.download_url([topurl + "a1.tar.gz"], path="no-slash", archive=True)
    ok_(ds.repo.file_has_content(opj("a1", "f1.txt")))
Beispiel #3
0
def _test_binary_data(host, store, dspath):
    # make sure, special remote deals with binary data and doesn't
    # accidentally involve any decode/encode etc.

    dspath = Path(dspath)
    store = Path(store)

    url = "https://github.com/datalad/example-dicom-functional/blob/master/dicoms/MR.1.3.46.670589.11.38317.5.0.4476.2014042516042547586"
    file = "dicomfile"
    ds = Dataset(dspath).create()
    ds.download_url(url, path=file, message="Add DICOM file from github")
    assert_repo_status(ds.path)

    # set up store:
    io = SSHRemoteIO(host) if host else LocalIO()
    if host:
        store_url = "ria+ssh://{host}{path}".format(host=host,
                                                    path=store)
    else:
        store_url = "ria+{}".format(store.as_uri())

    create_store(io, store, '1')
    create_ds_in_store(io, store, ds.id, '2', '1')

    # add special remote
    init_opts = common_init_opts + ['url={}'.format(store_url)]
    ds.repo.init_remote('store', options=init_opts)

    # actual data transfer (both directions)
    # Note, that we intentionally call annex commands instead of
    # datalad-publish/-get here. We are testing an annex-special-remote.

    store_uuid = ds.siblings(name='store',
                             return_type='item-or-list')['annex-uuid']
    here_uuid = ds.siblings(name='here',
                            return_type='item-or-list')['annex-uuid']

    known_sources = ds.repo.whereis(str(file))
    assert_in(here_uuid, known_sources)
    assert_not_in(store_uuid, known_sources)
    ds.repo.call_annex(['move', str(file), '--to', 'store'])
    known_sources = ds.repo.whereis(str(file))
    assert_not_in(here_uuid, known_sources)
    assert_in(store_uuid, known_sources)
    ds.repo.call_annex(['get', str(file), '--from', 'store'])
    known_sources = ds.repo.whereis(str(file))
    assert_in(here_uuid, known_sources)
    assert_in(store_uuid, known_sources)
Beispiel #4
0
def test_download_url_archive(toppath=None, topurl=None, path=None):
    ds = Dataset(path).create()
    ds.download_url([topurl + "archive.tar.gz"], archive=True)
    ok_(ds.repo.file_has_content(opj("archive", "file1.txt")))
    assert_not_in(opj(ds.path, "archive.tar.gz"), ds.repo.format_commit("%B"))
    # we should yield an impossible from add archive content when there is
    # untracked content (gh-#6170)
    create_tree(ds.path, {'this': 'dirty'})
    assert_in_results(
        ds.download_url([topurl + "archive.tar.gz"],
                        archive=True,
                        on_failure='ignore'),
        status='impossible',
        action='add-archive-content',
        message='clean dataset required. Use `datalad status` to inspect '
        'unsaved changes')
Beispiel #5
0
def test_download_url_dataset(toppath=None, topurl=None, path=None):
    # Non-dataset directory.
    file1_fullpath = opj(path, "file1.txt")
    with chpwd(path):
        download_url(topurl + "file1.txt")
        ok_exists(file1_fullpath)
    os.remove(file1_fullpath)

    files_tosave = ['file1.txt', 'file2.txt']
    urls_tosave = [topurl + f for f in files_tosave]

    ds = Dataset(opj(path, "ds")).create()

    # By default, files are saved when called in a dataset.
    ds.download_url(urls_tosave)
    for fname in files_tosave:
        ok_(ds.repo.file_has_content(fname))

    eq_(ds.repo.get_urls("file1.txt"), [urls_tosave[0]])
    eq_(ds.repo.get_urls("file2.txt"), [urls_tosave[1]])

    ds.download_url([topurl + "file3.txt"], save=False)
    assert_false(ds.repo.file_has_content("file3.txt"))

    # Leading paths for target are created if needed.
    subdir_target = opj("l1", "l2", "f")
    ds.download_url([opj(topurl, "file1.txt")], path=subdir_target)
    ok_(ds.repo.file_has_content(subdir_target))

    subdir_path = opj(ds.path, "subdir", "")
    os.mkdir(subdir_path)
    with chpwd(subdir_path):
        download_url(topurl + "file4.txt")
        download_url(topurl + "file5.txt", path="five.txt")
        ds.download_url(topurl + "file6.txt")
        download_url(topurl + "file7.txt", dataset=ds.path)
    # download_url calls within a subdirectory save the file there
    ok_(ds.repo.file_has_content(opj("subdir", "file4.txt")))
    ok_(ds.repo.file_has_content(opj("subdir", "five.txt")))
    # ... unless the dataset instance is provided
    ok_(ds.repo.file_has_content("file6.txt"))
    # ... but a string for the dataset (as it would be from the command line)
    # still uses CWD semantics
    ok_(ds.repo.file_has_content(opj("subdir", "file7.txt")))

    with chpwd(path):
        # We're in a non-dataset path and pass in a string as the dataset. The
        # path is taken as relative to the current working directory, so we get
        # an error when trying to save it.
        assert_in_results(download_url(topurl + "file8.txt",
                                       dataset=ds.path,
                                       on_failure="ignore"),
                          status="error",
                          action="status")
    assert_false((ds.pathobj / "file8.txt").exists())
Beispiel #6
0
def test_demo_repro_analysis(bids_path, ana_path, toolbox_url):

    import glob

    localizer_ds = Dataset(bids_path).create()
    localizer_ds.run_procedure('cfg_bids')

    # TODO: decorator
    # TODO: with config patch for toolbox ? -> overwrite?
    # localizer_ds.install(source="https://github.com/psychoinformatics-de/hirni-demo",
    #                      path="sourcedata",
    #                      recursive=True)
    with patch.dict('os.environ', {'DATALAD_HIRNI_TOOLBOX_URL': toolbox_url}):
        install_demo_dataset(localizer_ds, "sourcedata", recursive=True)

    assert_repo_status(localizer_ds.repo)
    subs = localizer_ds.subdatasets(recursive=True)
    assert_result_count(subs, 4)
    assert_result_count(subs, 1, path=op.join(localizer_ds.path, 'sourcedata'))
    assert_result_count(subs,
                        1,
                        path=op.join(localizer_ds.path, 'sourcedata', 'code',
                                     'hirni-toolbox'))
    assert_result_count(subs,
                        1,
                        path=op.join(localizer_ds.path, 'sourcedata', 'acq1',
                                     'dicoms'))
    assert_result_count(subs,
                        1,
                        path=op.join(localizer_ds.path, 'sourcedata', 'acq2',
                                     'dicoms'))

    localizer_ds.hirni_spec2bids(
        [op.join(localizer_ds.path, 'sourcedata', 'studyspec.json')] +
        glob.glob(
            op.join(localizer_ds.path, 'sourcedata', '*', 'studyspec.json')),
        anonymize=True)

    for f in [
            'sub-001',
            'task-oneback_bold.json',
            'participants.tsv',
            op.join('sub-001', 'sub-001_scans.tsv'),
            op.join('sub-001', 'anat'),
            op.join('sub-001', 'anat', 'sub-001_run-1_T1w.json'),
            op.join('sub-001', 'anat', 'sub-001_run-1_T1w.nii.gz'),
            op.join('sub-001', 'func'),
            op.join('sub-001', 'func',
                    'sub-001_task-oneback_run-01_bold.json'),
            op.join('sub-001', 'func',
                    'sub-001_task-oneback_run-01_bold.nii.gz'),
            op.join('sub-001', 'func',
                    'sub-001_task-oneback_run-01_events.tsv'),
    ]:
        assert_true(op.lexists(op.join(localizer_ds.path, f)))

    analysis_ds = Dataset(ana_path).create()
    analysis_ds.install(source=localizer_ds.path,
                        path=op.join('inputs', 'rawdata'))

    analysis_ds.run_procedure('cfg_yoda')
    # download-url expects the target dir to exist
    (analysis_ds.pathobj / 'code').mkdir(exist_ok=True)
    analysis_ds.download_url(
        path=op.join(analysis_ds.path, 'code') + op.
        sep,  # TODO: File issue. relative path via python API bound method doesn't work
        urls=[
            'https://raw.githubusercontent.com/myyoda/ohbm2018-training/master/section23/scripts/events2ev3.sh',
            'https://raw.githubusercontent.com/myyoda/ohbm2018-training/master/section23/scripts/ffa_design.fsf'
        ])

    assert_repo_status(analysis_ds.repo)
    ok_file_under_git(op.join(analysis_ds.path, 'code'),
                      'events2ev3.sh',
                      annexed=False)
    ok_file_under_git(op.join(analysis_ds.path, 'code'),
                      'ffa_design.fsf',
                      annexed=False)

    analysis_ds.run(inputs=[
        op.join('inputs', 'rawdata', 'sub-001', 'func',
                'sub-001_task-oneback_run-01_events.tsv')
    ],
                    outputs=[op.join('sub-001', 'onsets')],
                    cmd='bash code/events2ev3.sh sub-001 {inputs}',
                    message="Build FSL EV3 design files")

    raise SkipTest("Solve datalad-containers #115")

    analysis_ds.containers_add('fsl',
                               url="shub://ReproNim/ohbm2018-training:fsln")
    #   % datalad containers-list

    analysis_ds.save(version_tag="ready4analysis")

    assert_repo_status(analysis_ds.repo)

    #

    analysis_ds.run(
        outputs=[op.join('sub-001', '1stlvl_design.fsf')],
        cmd=
        "bash -c 'sed -e \"s,##BASEPATH##,{pwd},g\" -e \"s,##SUB##,sub-001,g\" code/ffa_design.fsf > {outputs}'",
        message="FSL FEAT analysis config script")

    assert_repo_status(analysis_ds.repo)
Beispiel #7
0
def test_read_access(store_path=None, store_url=None, ds_path=None):

    ds = Dataset(ds_path).create()
    populate_dataset(ds)

    files = [Path('one.txt'), Path('subdir') / 'two']
    store_path = Path(store_path)
    url = "ria+" + store_url
    init_opts = common_init_opts + ['url={}'.format(url)]

    io = LocalIO()
    create_store(io, store_path, '1')
    create_ds_in_store(io, store_path, ds.id, '2', '1')
    ds.repo.init_remote('ora-remote', options=init_opts)
    fsck_results = ds.repo.fsck(remote='ora-remote', fast=True)
    # Note: Failures in the special remote will show up as a success=False
    # result for fsck -> the call itself would not fail.
    for r in fsck_results:
        if "note" in r:
            # we could simply assert "note" to not be in r, but we want proper
            # error reporting - content of note, not just its unexpected
            # existence.
            assert_equal(r["success"],
                         "true",
                         msg="git-annex-fsck failed with ORA over HTTP: %s" %
                         r)
        assert_equal(r["error-messages"], [])
    store_uuid = ds.siblings(name='ora-remote',
                             return_type='item-or-list',
                             result_renderer='disabled')['annex-uuid']
    here_uuid = ds.siblings(name='here',
                            return_type='item-or-list',
                            result_renderer='disabled')['annex-uuid']

    # nothing in store yet:
    for f in files:
        known_sources = ds.repo.whereis(str(f))
        assert_in(here_uuid, known_sources)
        assert_not_in(store_uuid, known_sources)

    annex_obj_target = str(store_path / ds.id[:3] / ds.id[3:] / 'annex' /
                           'objects')
    shutil.rmtree(annex_obj_target)
    shutil.copytree(src=str(ds.repo.dot_git / 'annex' / 'objects'),
                    dst=annex_obj_target)

    ds.repo.fsck(remote='ora-remote', fast=True)
    # all in store now:
    for f in files:
        known_sources = ds.repo.whereis(str(f))
        assert_in(here_uuid, known_sources)
        assert_in(store_uuid, known_sources)

    ds.drop('.')
    res = ds.get('.')
    assert_equal(len(res), 4)
    assert_result_count(res,
                        4,
                        status='ok',
                        type='file',
                        action='get',
                        message="from ora-remote...")

    # try whether the reported access URL is correct
    one_url = ds.repo.whereis('one.txt',
                              output='full')[store_uuid]['urls'].pop()
    assert_status(
        'ok', ds.download_url(urls=[one_url], path=str(ds.pathobj / 'dummy')))