def test_download_url_need_datalad_remote(path=None): # publicly available (requires anonymous s3 access, so still needs our special remote) url = "s3://dandiarchive/ros3test.hdf5" path = Path(path) ds_a = Dataset(path / "a").create() ds_a.download_url([url], path="foo") ds_b = clone(source=ds_a.path, path=str(path / "b"), result_xfm="datasets", return_type="item-or-list") ds_b.repo.remove_remote(DEFAULT_REMOTE) ds_b.get("foo") ok_(ds_b.repo.file_has_content("foo"))
def test_download_url_archive_trailing_separator(toppath=None, topurl=None, path=None): ds = Dataset(path).create() # Archives will be extracted in the specified subdirectory, which doesn't # need to exist. ds.download_url([topurl + "a0.tar.gz"], path=opj("with-slash", ""), archive=True) ok_(ds.repo.file_has_content(opj("with-slash", "a0", "f0.txt"))) # But if the path doesn't have a trailing separator, it will not be # considered a directory. The archive will be downloaded to that path and # then extracted in the top-level of the dataset. ds.download_url([topurl + "a1.tar.gz"], path="no-slash", archive=True) ok_(ds.repo.file_has_content(opj("a1", "f1.txt")))
def _test_binary_data(host, store, dspath): # make sure, special remote deals with binary data and doesn't # accidentally involve any decode/encode etc. dspath = Path(dspath) store = Path(store) url = "https://github.com/datalad/example-dicom-functional/blob/master/dicoms/MR.1.3.46.670589.11.38317.5.0.4476.2014042516042547586" file = "dicomfile" ds = Dataset(dspath).create() ds.download_url(url, path=file, message="Add DICOM file from github") assert_repo_status(ds.path) # set up store: io = SSHRemoteIO(host) if host else LocalIO() if host: store_url = "ria+ssh://{host}{path}".format(host=host, path=store) else: store_url = "ria+{}".format(store.as_uri()) create_store(io, store, '1') create_ds_in_store(io, store, ds.id, '2', '1') # add special remote init_opts = common_init_opts + ['url={}'.format(store_url)] ds.repo.init_remote('store', options=init_opts) # actual data transfer (both directions) # Note, that we intentionally call annex commands instead of # datalad-publish/-get here. We are testing an annex-special-remote. store_uuid = ds.siblings(name='store', return_type='item-or-list')['annex-uuid'] here_uuid = ds.siblings(name='here', return_type='item-or-list')['annex-uuid'] known_sources = ds.repo.whereis(str(file)) assert_in(here_uuid, known_sources) assert_not_in(store_uuid, known_sources) ds.repo.call_annex(['move', str(file), '--to', 'store']) known_sources = ds.repo.whereis(str(file)) assert_not_in(here_uuid, known_sources) assert_in(store_uuid, known_sources) ds.repo.call_annex(['get', str(file), '--from', 'store']) known_sources = ds.repo.whereis(str(file)) assert_in(here_uuid, known_sources) assert_in(store_uuid, known_sources)
def test_download_url_archive(toppath=None, topurl=None, path=None): ds = Dataset(path).create() ds.download_url([topurl + "archive.tar.gz"], archive=True) ok_(ds.repo.file_has_content(opj("archive", "file1.txt"))) assert_not_in(opj(ds.path, "archive.tar.gz"), ds.repo.format_commit("%B")) # we should yield an impossible from add archive content when there is # untracked content (gh-#6170) create_tree(ds.path, {'this': 'dirty'}) assert_in_results( ds.download_url([topurl + "archive.tar.gz"], archive=True, on_failure='ignore'), status='impossible', action='add-archive-content', message='clean dataset required. Use `datalad status` to inspect ' 'unsaved changes')
def test_download_url_dataset(toppath=None, topurl=None, path=None): # Non-dataset directory. file1_fullpath = opj(path, "file1.txt") with chpwd(path): download_url(topurl + "file1.txt") ok_exists(file1_fullpath) os.remove(file1_fullpath) files_tosave = ['file1.txt', 'file2.txt'] urls_tosave = [topurl + f for f in files_tosave] ds = Dataset(opj(path, "ds")).create() # By default, files are saved when called in a dataset. ds.download_url(urls_tosave) for fname in files_tosave: ok_(ds.repo.file_has_content(fname)) eq_(ds.repo.get_urls("file1.txt"), [urls_tosave[0]]) eq_(ds.repo.get_urls("file2.txt"), [urls_tosave[1]]) ds.download_url([topurl + "file3.txt"], save=False) assert_false(ds.repo.file_has_content("file3.txt")) # Leading paths for target are created if needed. subdir_target = opj("l1", "l2", "f") ds.download_url([opj(topurl, "file1.txt")], path=subdir_target) ok_(ds.repo.file_has_content(subdir_target)) subdir_path = opj(ds.path, "subdir", "") os.mkdir(subdir_path) with chpwd(subdir_path): download_url(topurl + "file4.txt") download_url(topurl + "file5.txt", path="five.txt") ds.download_url(topurl + "file6.txt") download_url(topurl + "file7.txt", dataset=ds.path) # download_url calls within a subdirectory save the file there ok_(ds.repo.file_has_content(opj("subdir", "file4.txt"))) ok_(ds.repo.file_has_content(opj("subdir", "five.txt"))) # ... unless the dataset instance is provided ok_(ds.repo.file_has_content("file6.txt")) # ... but a string for the dataset (as it would be from the command line) # still uses CWD semantics ok_(ds.repo.file_has_content(opj("subdir", "file7.txt"))) with chpwd(path): # We're in a non-dataset path and pass in a string as the dataset. The # path is taken as relative to the current working directory, so we get # an error when trying to save it. assert_in_results(download_url(topurl + "file8.txt", dataset=ds.path, on_failure="ignore"), status="error", action="status") assert_false((ds.pathobj / "file8.txt").exists())
def test_demo_repro_analysis(bids_path, ana_path, toolbox_url): import glob localizer_ds = Dataset(bids_path).create() localizer_ds.run_procedure('cfg_bids') # TODO: decorator # TODO: with config patch for toolbox ? -> overwrite? # localizer_ds.install(source="https://github.com/psychoinformatics-de/hirni-demo", # path="sourcedata", # recursive=True) with patch.dict('os.environ', {'DATALAD_HIRNI_TOOLBOX_URL': toolbox_url}): install_demo_dataset(localizer_ds, "sourcedata", recursive=True) assert_repo_status(localizer_ds.repo) subs = localizer_ds.subdatasets(recursive=True) assert_result_count(subs, 4) assert_result_count(subs, 1, path=op.join(localizer_ds.path, 'sourcedata')) assert_result_count(subs, 1, path=op.join(localizer_ds.path, 'sourcedata', 'code', 'hirni-toolbox')) assert_result_count(subs, 1, path=op.join(localizer_ds.path, 'sourcedata', 'acq1', 'dicoms')) assert_result_count(subs, 1, path=op.join(localizer_ds.path, 'sourcedata', 'acq2', 'dicoms')) localizer_ds.hirni_spec2bids( [op.join(localizer_ds.path, 'sourcedata', 'studyspec.json')] + glob.glob( op.join(localizer_ds.path, 'sourcedata', '*', 'studyspec.json')), anonymize=True) for f in [ 'sub-001', 'task-oneback_bold.json', 'participants.tsv', op.join('sub-001', 'sub-001_scans.tsv'), op.join('sub-001', 'anat'), op.join('sub-001', 'anat', 'sub-001_run-1_T1w.json'), op.join('sub-001', 'anat', 'sub-001_run-1_T1w.nii.gz'), op.join('sub-001', 'func'), op.join('sub-001', 'func', 'sub-001_task-oneback_run-01_bold.json'), op.join('sub-001', 'func', 'sub-001_task-oneback_run-01_bold.nii.gz'), op.join('sub-001', 'func', 'sub-001_task-oneback_run-01_events.tsv'), ]: assert_true(op.lexists(op.join(localizer_ds.path, f))) analysis_ds = Dataset(ana_path).create() analysis_ds.install(source=localizer_ds.path, path=op.join('inputs', 'rawdata')) analysis_ds.run_procedure('cfg_yoda') # download-url expects the target dir to exist (analysis_ds.pathobj / 'code').mkdir(exist_ok=True) analysis_ds.download_url( path=op.join(analysis_ds.path, 'code') + op. sep, # TODO: File issue. relative path via python API bound method doesn't work urls=[ 'https://raw.githubusercontent.com/myyoda/ohbm2018-training/master/section23/scripts/events2ev3.sh', 'https://raw.githubusercontent.com/myyoda/ohbm2018-training/master/section23/scripts/ffa_design.fsf' ]) assert_repo_status(analysis_ds.repo) ok_file_under_git(op.join(analysis_ds.path, 'code'), 'events2ev3.sh', annexed=False) ok_file_under_git(op.join(analysis_ds.path, 'code'), 'ffa_design.fsf', annexed=False) analysis_ds.run(inputs=[ op.join('inputs', 'rawdata', 'sub-001', 'func', 'sub-001_task-oneback_run-01_events.tsv') ], outputs=[op.join('sub-001', 'onsets')], cmd='bash code/events2ev3.sh sub-001 {inputs}', message="Build FSL EV3 design files") raise SkipTest("Solve datalad-containers #115") analysis_ds.containers_add('fsl', url="shub://ReproNim/ohbm2018-training:fsln") # % datalad containers-list analysis_ds.save(version_tag="ready4analysis") assert_repo_status(analysis_ds.repo) # analysis_ds.run( outputs=[op.join('sub-001', '1stlvl_design.fsf')], cmd= "bash -c 'sed -e \"s,##BASEPATH##,{pwd},g\" -e \"s,##SUB##,sub-001,g\" code/ffa_design.fsf > {outputs}'", message="FSL FEAT analysis config script") assert_repo_status(analysis_ds.repo)
def test_read_access(store_path=None, store_url=None, ds_path=None): ds = Dataset(ds_path).create() populate_dataset(ds) files = [Path('one.txt'), Path('subdir') / 'two'] store_path = Path(store_path) url = "ria+" + store_url init_opts = common_init_opts + ['url={}'.format(url)] io = LocalIO() create_store(io, store_path, '1') create_ds_in_store(io, store_path, ds.id, '2', '1') ds.repo.init_remote('ora-remote', options=init_opts) fsck_results = ds.repo.fsck(remote='ora-remote', fast=True) # Note: Failures in the special remote will show up as a success=False # result for fsck -> the call itself would not fail. for r in fsck_results: if "note" in r: # we could simply assert "note" to not be in r, but we want proper # error reporting - content of note, not just its unexpected # existence. assert_equal(r["success"], "true", msg="git-annex-fsck failed with ORA over HTTP: %s" % r) assert_equal(r["error-messages"], []) store_uuid = ds.siblings(name='ora-remote', return_type='item-or-list', result_renderer='disabled')['annex-uuid'] here_uuid = ds.siblings(name='here', return_type='item-or-list', result_renderer='disabled')['annex-uuid'] # nothing in store yet: for f in files: known_sources = ds.repo.whereis(str(f)) assert_in(here_uuid, known_sources) assert_not_in(store_uuid, known_sources) annex_obj_target = str(store_path / ds.id[:3] / ds.id[3:] / 'annex' / 'objects') shutil.rmtree(annex_obj_target) shutil.copytree(src=str(ds.repo.dot_git / 'annex' / 'objects'), dst=annex_obj_target) ds.repo.fsck(remote='ora-remote', fast=True) # all in store now: for f in files: known_sources = ds.repo.whereis(str(f)) assert_in(here_uuid, known_sources) assert_in(store_uuid, known_sources) ds.drop('.') res = ds.get('.') assert_equal(len(res), 4) assert_result_count(res, 4, status='ok', type='file', action='get', message="from ora-remote...") # try whether the reported access URL is correct one_url = ds.repo.whereis('one.txt', output='full')[store_uuid]['urls'].pop() assert_status( 'ok', ds.download_url(urls=[one_url], path=str(ds.pathobj / 'dummy')))