def test_dont_trip_over_missing_subds(path): ds1 = Dataset(opj(path, 'ds1')).create() ds2 = Dataset(opj(path, 'ds2')).create() subds2 = ds1.install( source=ds2.path, path='subds2', result_xfm='datasets', return_type='item-or-list') assert_true(subds2.is_installed()) assert_in('subds2', ds1.subdatasets(result_xfm='relpaths')) subds2.uninstall() assert_in('subds2', ds1.subdatasets(result_xfm='relpaths')) assert_false(subds2.is_installed()) # see if it wants to talk to github (and fail), or if it trips over something # before assert_raises(gh.BadCredentialsException, ds1.create_sibling_github, 'bogus', recursive=True, github_login='******') # inject remote config prior run assert_not_in('github', ds1.repo.get_remotes()) # fail on existing ds1.repo.add_remote('github', 'http://nothere') assert_raises(ValueError, ds1.create_sibling_github, 'bogus', recursive=True, github_login='******') # talk to github when existing is OK assert_raises(gh.BadCredentialsException, ds1.create_sibling_github, 'bogus', recursive=True, github_login='******', existing='reconfigure') # return happy emptiness when all is skipped assert_equal( ds1.create_sibling_github( 'bogus', recursive=True, github_login='******', existing='skip'), [])
def test_base(path): ds = create(path) ds.ukb_init('12345', ['20249_2_0', '20249_3_0', '20250_2_0']) # standard branch setup assert_true(all( b in sorted(ds.repo.get_branches()) for b in ['git-annex', 'incoming', 'incoming-native', DEFAULT_BRANCH]) ) # standard batch file setup eq_(ds.repo.call_git(['cat-file', '-p', 'incoming:.ukbbatch']), '12345 20249_2_0\n12345 20249_3_0\n12345 20250_2_0\n') # intermediate branch is empty, apart from .gitattributes eq_([l for l in ds.repo.call_git(['ls-tree', 'incoming-native']).splitlines() if not l.strip().endswith('.gitattributes')], []) # no batch in master assert_not_in('ukbbatch', ds.repo.call_git(['ls-tree', DEFAULT_BRANCH])) # no re-init without force assert_status( 'error', ds.ukb_init('12', ['12', '23'], on_failure='ignore')) ds.ukb_init('12345', ['20250_2_0'], force=True) eq_(ds.repo.call_git(['cat-file', '-p', 'incoming:.ukbbatch']), '12345 20250_2_0\n')
def test_add_archive_content_tar(repo_path): mode = 'full' special_remotes = [DATALAD_SPECIAL_REMOTE, ARCHIVES_SPECIAL_REMOTE] annex = Annexificator(path=repo_path, allow_dirty=True, mode=mode, special_remotes=special_remotes, largefiles="exclude=*.txt and exclude=SOMEOTHER") output_add = list(annex({'filename': '1.tar'})) # adding it to annex assert_equal(output_add, [{'filename': '1.tar'}]) if external_versions['cmd:annex'] >= '6.20170208': # should have fixed remotes from datalad.consts import DATALAD_SPECIAL_REMOTES_UUIDS for remote in special_remotes: eq_(annex.repo.get_description(uuid=DATALAD_SPECIAL_REMOTES_UUIDS[remote]), '[%s]' % remote) #stats = ActivityStats() #output_add[0]['datalad_stats'] = ActivityStats() output_addarchive = list( annex.add_archive_content( existing='archive-suffix', delete=True, strip_leading_dirs=True,)(output_add[0])) assert_equal(output_addarchive, [{'datalad_stats': ActivityStats(add_annex=1, add_git=1, files=3, renamed=2), 'filename': '1.tar'}]) assert_true(annex.repo.dirty) annex.repo.commit("added") ok_file_under_git(annex.repo.path, 'file.txt', annexed=False) ok_file_under_git(annex.repo.path, '1.dat', annexed=True) assert_false(lexists(opj(repo_path, '1.tar'))) assert_false(annex.repo.dirty)
def test_subprocess_return_code_capture(): class KillProtocol(WitlessProtocol): proc_out = True proc_err = True def __init__(self, signal_to_send: int, result_pool: dict): super().__init__() self.signal_to_send = signal_to_send self.result_pool = result_pool def connection_made(self, process): super().connection_made(process) process.send_signal(self.signal_to_send) def connection_lost(self, exc): self.result_pool["connection_lost_called"] = (True, exc) def process_exited(self): self.result_pool["process_exited_called"] = True signal_to_send = signal.SIGINT result_pool = dict() result = run_command(["sleep", "10000"], KillProtocol, None, { "signal_to_send": signal_to_send, "result_pool": result_pool }) eq_(result["code"], -signal_to_send) assert_true(result_pool["connection_lost_called"][0]) assert_true(result_pool["process_exited_called"])
def test_external_versions_basic(): ev = ExternalVersions() our_module = 'datalad' assert_equal(ev.versions, {}) assert_equal(ev[our_module], __version__) # and it could be compared assert_greater_equal(ev[our_module], __version__) assert_greater(ev[our_module], '0.1') assert_equal(list(ev.keys()), [our_module]) assert_true(our_module in ev) assert_false('unknown' in ev) # all are LooseVersions now assert_true(isinstance(ev[our_module], LooseVersion)) version_str = __version__ assert_equal(ev.dumps(), "Versions: %s=%s" % (our_module, version_str)) # For non-existing one we get None assert_equal(ev['custom__nonexisting'], None) # and nothing gets added to _versions for nonexisting assert_equal(set(ev.versions.keys()), {our_module}) # but if it is a module without version, we get it set to UNKNOWN assert_equal(ev['os'], ev.UNKNOWN) # And get a record on that inside assert_equal(ev.versions.get('os'), ev.UNKNOWN) # And that thing is "True", i.e. present assert (ev['os']) # but not comparable with anything besides itself (was above) assert_raises(TypeError, cmp, ev['os'], '0') assert_raises(TypeError, assert_greater, ev['os'], '0') return
def decorated_test1(url): # we expect a file-scheme url to a cached version of `ds_url` expect_origin_path = cache_dir / name_in_cache assert_equal(expect_origin_path.as_uri(), url) origin = Dataset(expect_origin_path) assert_true(origin.is_installed()) assert_false(origin.repo.file_has_content(str(annexed_file)))
def test_path_and_url(path, url): def _urlopen(url, auth=None): req = Request(url) if auth: req.add_header( "Authorization", b"Basic " + base64.standard_b64encode( '{0}:{1}'.format(*auth).encode('utf-8'))) return urlopen(req) # @serve_ should remove http_proxy from the os.environ if was present if not on_windows: assert_false('http_proxy' in os.environ) # get the "dir-view" dirurl = url + test_fpath.parent.as_posix() u = _urlopen(dirurl, auth) assert_true(u.getcode() == 200) html = u.read() # get the actual content file_html = _urlopen(url + url_quote(test_fpath.as_posix()), auth).read().decode() # verify we got the right one eq_(file_html, test_fpath_full.read_text()) if bs4 is None: return # MIH is not sure what this part below is supposed to do # possibly some kind of internal consistency test soup = bs4.BeautifulSoup(html, "html.parser") href_links = [txt.get('href') for txt in soup.find_all('a')] assert_true(len(href_links) == 1) parsed_url = f"{dirurl}/{href_links[0]}" u = _urlopen(parsed_url, auth) html = u.read().decode() eq_(html, file_html)
def check_compress_file(ext, annex, path, name): # we base the archive name on the filename, in order to also # be able to properly test compressors where the corresponding # archive format has no capability of storing a filename # (i.e. where the archive name itself determines the filename # of the decompressed file, like .xz) archive = op.join(name, _filename + ext) compress_files([_filename], archive, path=path) assert_true(op.exists(archive)) if annex: # It should work even when file is annexed and is a symlink to the # key from datalad.support.annexrepo import AnnexRepo repo = AnnexRepo(path, init=True) repo.add(_filename) repo.commit(files=[_filename], msg="commit") dir_extracted = name + "_extracted" try: decompress_file(archive, dir_extracted) except MissingExternalDependency as exc: raise SkipTest(exc_str(exc)) _filepath = op.join(dir_extracted, _filename) ok_file_has_content(_filepath, 'content')
def test_addurls_subdataset(self, path): ds = Dataset(path).create(force=True) with chpwd(path): for save in True, False: label = "save" if save else "nosave" hexsha_before = ds.repo.get_hexsha() ds.addurls(self.json_file, "{url}", "{subdir}-" + label + "//{name}", save=save) hexsha_after = ds.repo.get_hexsha() for fname in ["foo-{}/a", "bar-{}/b", "foo-{}/c"]: ok_exists(fname.format(label)) assert_true(save ^ (hexsha_before == hexsha_after)) assert_true(save ^ ds.repo.dirty) # Now save the "--nosave" changes and check that we have # all the subdatasets. ds.add(".") eq_(set(subdatasets(ds, recursive=True, result_xfm="relpaths")), {"foo-save", "bar-save", "foo-nosave", "bar-nosave"}) # We don't try to recreate existing subdatasets. with swallow_logs(new_level=logging.DEBUG) as cml: ds.addurls(self.json_file, "{url}", "{subdir}-nosave//{name}") assert_in("Not creating subdataset at existing path", cml.out)
def test_add_file_end_to_end(file_name): test_path = "d_0/d_0.0/f_0.0.0" json.dump({ **metadata_template, "type": "file", "path": test_path }, open(file_name, "tw")) with tempfile.TemporaryDirectory() as temp_dir: git_repo = GitRepo(temp_dir) res = meta_add(metadata=file_name, metadata_store=git_repo.path) assert_result_count(res, 1) assert_result_count(res, 1, type='file') assert_result_count(res, 0, type='dataset') # Verify file level metadata was added tree_version_list, uuid_set, mrr = _get_top_nodes( git_repo, UUID(metadata_template["dataset_id"]), metadata_template["dataset_version"]) file_tree = mrr.get_file_tree() assert_is_not_none(file_tree) assert_true(test_path in file_tree) metadata = file_tree.get_metadata(MetadataPath(test_path)) metadata_content = _get_metadata_content(metadata) eq_(metadata_content, metadata_template["extracted_metadata"])
def test_ExtractedArchive(path): archive = op.join(path, fn_archive_obscure_ext) earchive = ExtractedArchive(archive) assert_false(op.exists(earchive.path)) # no longer the case -- just using hash for now # assert_in(os.path.basename(archive), earchive.path) fpath = op.join(fn_archive_obscure, # lead directory fn_in_archive_obscure) extracted = earchive.get_extracted_filename(fpath) eq_(extracted, op.join(earchive.path, fpath)) assert_false(op.exists(extracted)) # not yet extracted_ = earchive.get_extracted_file(fpath) eq_(extracted, extracted_) assert_true(op.exists(extracted)) # now it should extracted_files = earchive.get_extracted_files() ok_generator(extracted_files) eq_(sorted(extracted_files), sorted([ # ['bbc/3.txt', 'bbc/abc'] op.join(fn_archive_obscure, fn_in_archive_obscure), op.join(fn_archive_obscure, '3.txt') ])) earchive.clean() if not os.environ.get('DATALAD_TESTS_TEMP_KEEP'): assert_false(op.exists(earchive.path))
def test_ignore_nondatasets(path): # we want to ignore the version/commits for this test def _kill_time(meta): for m in meta: for k in ('version', 'shasum'): if k in m: del m[k] return meta ds = Dataset(path).create() meta = _kill_time(ds.metadata(reporton='datasets', on_failure='ignore')) n_subm = 0 # placing another repo in the dataset has no effect on metadata for cls, subpath in ((GitRepo, 'subm'), (AnnexRepo, 'annex_subm')): subm_path = opj(ds.path, subpath) r = cls(subm_path, create=True) with open(opj(subm_path, 'test'), 'w') as f: f.write('test') r.add('test') r.commit('some') assert_true(Dataset(subm_path).is_installed()) assert_equal( meta, _kill_time(ds.metadata(reporton='datasets', on_failure='ignore'))) # making it a submodule has no effect either ds.save(subpath) assert_equal(len(ds.subdatasets()), n_subm + 1) assert_equal( meta, _kill_time(ds.metadata(reporton='datasets', on_failure='ignore'))) n_subm += 1
def _test_initremote_alias(host, ds_path, store): ds_path = Path(ds_path) store = Path(store) ds = Dataset(ds_path).create() populate_dataset(ds) ds.save() if host: url = "ria+ssh://{host}{path}".format(host=host, path=store) else: url = "ria+{}".format(store.as_uri()) init_opts = common_init_opts + ['url={}'.format(url)] # set up store: io = SSHRemoteIO(host) if host else LocalIO() create_store(io, store, '1') # set up the dataset with alias create_ds_in_store(io, store, ds.id, '2', '1', 'ali') ds.repo.init_remote('ria-remote', options=init_opts) assert_in('ria-remote', [cfg['name'] for uuid, cfg in ds.repo.get_special_remotes().items()] ) assert_repo_status(ds.path) assert_true(io.exists(store / "alias" / "ali"))
def test_bids(path): ds = create(path) ds.ukb_init('12345', ['20249_2_0', '20249_3_0', '20250_2_0'], bids=True) # standard branch setup assert_true(all( b in sorted(ds.repo.get_branches()) for b in ['git-annex', 'incoming', 'incoming-native', DEFAULT_BRANCH]) ) # intermediate branches are empty for b in 'incoming-bids', 'incoming-native': eq_([l for l in ds.repo.call_git(['ls-tree', b]).splitlines() if not l.strip().endswith('.gitattributes')], []) # no batch in master assert_not_in('ukbbatch', ds.repo.call_git(['ls-tree', DEFAULT_BRANCH])) # smoke test for a reinit ds.ukb_init('12345', ['20250_2_0'], bids=True, force=True) assert_true(all( b in sorted(ds.repo.get_branches()) for b in ['git-annex', 'incoming', 'incoming-native', 'incoming-bids', DEFAULT_BRANCH]) )
def test_addurls_subdataset(self, path): ds = Dataset(path).create(force=True) with chpwd(path): for save in True, False: label = "save" if save else "nosave" hexsha_before = ds.repo.get_hexsha() ds.addurls(self.json_file, "{url}", "{subdir}-" + label + "//{name}", save=save) hexsha_after = ds.repo.get_hexsha() for fname in ["foo-{}/a", "bar-{}/b", "foo-{}/c"]: ok_exists(fname.format(label)) assert_true(save ^ (hexsha_before == hexsha_after)) assert_true(save ^ ds.repo.dirty) # Now save the "--nosave" changes and check that we have # all the subdatasets. ds.add(".") eq_(set(subdatasets(ds, recursive=True, result_xfm="relpaths")), {"foo-save", "bar-save", "foo-nosave", "bar-nosave"}) # We don't try to recreate existing subdatasets. with swallow_logs(new_level=logging.DEBUG) as cml: ds.addurls(self.json_file, "{url}", "{subdir}-nosave//{name}") assert_in("Not creating subdataset at existing path", cml.out)
def test_ExecutionTimeExternalsProtocol(path1, path2): timer_protocol = ExecutionTimeExternalsProtocol() runner = Runner(protocol=timer_protocol) # test external command: cmd = ['git', 'init'] os.mkdir(path1) runner.run(cmd, cwd=path1) assert_equal(len(timer_protocol), 1, str(runner.protocol)) assert_equal(cmd, timer_protocol[0]['command']) ok_(timer_protocol[0]['end'] >= timer_protocol[0]['start']) ok_(timer_protocol[0]['duration'] >= 0) assert_is(timer_protocol[0]['exception'], None) # now with exception, since path2 doesn't exist yet: try: with swallow_logs() as cml: runner.run(cmd, cwd=path2) except Exception as e: catched_exception = e finally: assert_equal(len(timer_protocol), 2) assert_equal(cmd, timer_protocol[1]['command']) ok_(timer_protocol[1]['end'] >= timer_protocol[1]['start']) ok_(timer_protocol[1]['duration'] >= 0) assert_is(timer_protocol[1]['exception'], catched_exception) # test callable (no entry added): new_runner = Runner(cwd=path2, protocol=timer_protocol) new_runner(os.mkdir, path2) assert_true(os.path.exists(path2)) assert_equal(len(timer_protocol), 2)
def test_configs(path): # set up dataset with registered procedure (c&p from test_basics): ds = Dataset(path).create(force=True) ds.run_procedure('cfg_yoda') # configure dataset to look for procedures in its code folder ds.config.add('datalad.locations.dataset-procedures', 'code', where='dataset') # 1. run procedure based on execution guessing by run_procedure: ds.run_procedure(spec=['datalad_test_proc', 'some_arg']) # look for traces ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'some_arg\n') # 2. now configure specific call format including usage of substitution config # for run: ds.config.add('datalad.procedures.datalad_test_proc.call-format', u'%s {script} {ds} {{mysub}} {args}' % quote_cmdlinearg(sys.executable), where='dataset') ds.config.add('datalad.run.substitutions.mysub', 'dataset-call-config', where='dataset') # TODO: Should we allow for --inputs/--outputs arguments for run_procedure # (to be passed into run)? ds.unlock("fromproc.txt") # run again: ds.run_procedure(spec=['datalad_test_proc', 'some_arg']) # look for traces ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'dataset-call-config\n') # 3. have a conflicting config at user-level, which should override the # config on dataset level: ds.config.add('datalad.procedures.datalad_test_proc.call-format', u'%s {script} {ds} local {args}' % quote_cmdlinearg(sys.executable), where='local') ds.unlock("fromproc.txt") # run again: ds.run_procedure(spec=['datalad_test_proc', 'some_arg']) # look for traces ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'local\n') # 4. get configured help message: r = ds.run_procedure('datalad_test_proc', help_proc=True, on_failure='ignore') assert_true(len(r) == 1) assert_in_results(r, status="impossible") ds.config.add('datalad.procedures.datalad_test_proc.help', "This is a help message", where='dataset') r = ds.run_procedure('datalad_test_proc', help_proc=True) assert_true(len(r) == 1) assert_in_results(r, message="This is a help message", status='ok')
def test_minimal(path): ds = Dataset(opj(path, 'ds')).create(force=True) ds.add('.') ok_clean_git(ds.path) # make sure essential metadata files are annex for this test # we won't to drop them later and still do the conversion assert_true( ds.repo.is_under_annex( ['participants.tsv', 'dataset_description.json'])) ds.aggregate_metadata() ok_clean_git(ds.path) # do conversion # where output should appear by default target_path = opj(path, 'scidata_isatab_{}'.format(ds.repo.get_hexsha())) with chpwd(path): assert_status( 'ok', ds.plugin( 'bids2scidata', repo_name="dummy", repo_accession='ds1', repo_url='http://example.com', )) # just a few basic sanity tests that info ends up in the right places # a proper test should be a full regression test on a real dataset # with hand-validated exported metadata # investigator info invest = open(opj(target_path, 'i_Investigation.txt')).read() assert_in('Betty\tTom', invest) assert_in('Study Assay File Name\ta_mri_t1w.txt\ta_mri_bold.txt', invest) assert_in( 'Comment[Data Repository]\tdummy\nComment[Data Record Accession]\tds1\nComment[Data Record URI]\thttp://example.com', invest) # study table assert_equal( """\ Source Name\tCharacteristics[organism]\tCharacteristics[organism part]\tProtocol REF\tSample Name\tCharacteristics[sex]\tCharacteristics[age at scan]\tCharacteristics[handedness] 01\thomo sapiens\tbrain\tParticipant recruitment\t01\tm\t30\tr 15\thomo sapiens\tbrain\tParticipant recruitment\t15\tf\t35\tl """, open(opj(target_path, 's_study.txt')).read()) # assay tables assert_equal( """\ Sample Name\tProtocol REF\tParameter Value[modality]\tAssay Name\tRaw Data File\tComment[Data Repository]\tComment[Data Record Accession]\tComment[Data Record URI]\tFactor Value[task] sub-15\tMagnetic Resonance Imaging\tbold\tsub-15_task-nix_run-1\tsub-15/func/sub-15_task-nix_run-1_bold.nii.gz\tdummy\tds1\thttp://example.com\tnix """, open(opj(target_path, 'a_mri_bold.txt')).read()) assert_equal( """\ Sample Name\tProtocol REF\tParameter Value[modality]\tAssay Name\tRaw Data File\tComment[Data Repository]\tComment[Data Record Accession]\tComment[Data Record URI] sub-01\tMagnetic Resonance Imaging\tT1w\tsub-01\tsub-01/anat/sub-01_T1w.nii.gz\tdummy\tds1\thttp://example.com """, open(opj(target_path, 'a_mri_t1w.txt')).read())
def test_dataset_id(path): ds = Dataset(path) assert_equal(ds.id, None) ds.create() dsorigid = ds.id # ID is always a UUID assert_equal(ds.id.count('-'), 4) assert_equal(len(ds.id), 36) # creating a new object for the same path # yields the same ID # Note: Since we switched to singletons, a reset is required in order to # make sure we get a new object # TODO: Reconsider the actual intent of this assertion. Clearing the flyweight # dict isn't a nice approach. May be create needs a fix/RF? Dataset._unique_instances.clear() newds = Dataset(path) assert_false(ds is newds) assert_equal(ds.id, newds.id) # recreating the dataset does NOT change the id # # Note: Since we switched to singletons, a reset is required in order to # make sure we get a new object # TODO: Reconsider the actual intent of this assertion. Clearing the flyweight # dict isn't a nice approach. May be create needs a fix/RF? Dataset._unique_instances.clear() ds.create(annex=False, force=True) assert_equal(ds.id, dsorigid) # even adding an annex doesn't # # Note: Since we switched to singletons, a reset is required in order to # make sure we get a new object # TODO: Reconsider the actual intent of this assertion. Clearing the flyweight # dict isn't a nice approach. May be create needs a fix/RF? Dataset._unique_instances.clear() AnnexRepo._unique_instances.clear() ds.create(force=True) assert_equal(ds.id, dsorigid) # dataset ID and annex UUID have nothing to do with each other # if an ID was already generated assert_true(ds.repo.uuid != ds.id) # creating a new object for the same dataset with an ID on record # yields the same ID # # Note: Since we switched to singletons, a reset is required in order to # make sure we get a new object # TODO: Reconsider the actual intent of this assertion. Clearing the flyweight # dict isn't a nice approach. May be create needs a fix/RF? Dataset._unique_instances.clear() newds = Dataset(path) assert_false(ds is newds) assert_equal(ds.id, newds.id) # even if we generate a dataset from scratch with an annex UUID right away, # this is also not the ID annexds = Dataset(opj(path, 'scratch')).create() assert_true(annexds.id != annexds.repo.uuid)
def test_procedure_discovery(): ps = run_procedure(discover=True) # there are a few procedures coming with datalad, needs to find them assert_true(len(ps) > 2) # we get three essential properties eq_( sum([ 'procedure_type' in p and 'procedure_callfmt' in p and 'path' in p for p in ps ]), len(ps))
def test_external_versions_popular_packages(): ev = ExternalVersions() for modname in ('scipy', 'numpy', 'mvpa2', 'sklearn', 'statsmodels', 'pandas', 'matplotlib', 'psychopy'): yield _test_external, ev, modname # more of a smoke test assert_false(linesep in ev.dumps()) assert_true(ev.dumps(indent=True).endswith(linesep))
def test_zip_archive(path): ds = Dataset(opj(path, 'ds')).create(force=True, no_annex=True) ds.add('.') with chpwd(path): ds.plugin('export_archive', filename='my', archivetype='zip') assert_true(os.path.exists('my.zip')) custom1_md5 = md5sum('my.zip') time.sleep(1.1) ds.plugin('export_archive', filename='my', archivetype='zip') assert_equal(md5sum('my.zip'), custom1_md5)
def test_invalid_call(path): with chpwd(path): # ^ Change directory so that we don't fail with an # InvalidGitRepositoryError if the test is executed from a git # worktree. # needs spec or discover assert_raises(InsufficientArgumentsError, run_procedure) res = run_procedure('unknown', on_failure='ignore') assert_true(len(res) == 1) assert_in_results(res, status="impossible")
def test_install_dataset_from_just_source_via_path(url, path): # for remote urls only, the source could be given to `path` # to allows for simplistic cmdline calls with chpwd(path, mkdir=True): ds = install(path=url) ok_startswith(ds.path, path) ok_(ds.is_installed()) ok_clean_git(ds.path, annex=False) assert_true(os.path.lexists(opj(ds.path, 'test-annex.dat')))
def test_install_dataset_from_just_source_via_path(url, path): # for remote urls only, the source could be given to `path` # to allows for simplistic cmdline calls with chpwd(path, mkdir=True): ds = install(path=url) ok_startswith(ds.path, path) ok_(ds.is_installed()) ok_clean_git(ds.path, annex=False) assert_true(os.path.lexists(opj(ds.path, 'test-annex.dat')))
def test_external_versions_rogue_module(topd): ev = ExternalVersions() # if module throws some other non-ImportError exception upon import # we must not crash, but issue a warning modname = 'verycustomrogue__' create_tree(topd, {modname + '.py': 'raise Exception("pickaboo")'}) with patch('sys.path', [topd]), \ swallow_logs(new_level=logging.WARNING) as cml: assert ev[modname] is None assert_true(ev.dumps(indent=True).endswith(linesep)) assert_in('pickaboo', cml.out)
def test_cred1_enter_new(): keyring = MemoryKeyring() cred = UserPassword("name", keyring=keyring) assert_false(cred.is_known) assert_equal(cred.enter_new(), None) assert_true(cred.is_known) assert_equal(keyring.get('name', 'user'), 'user1') assert_equal(keyring.get('name', 'password'), 'password1') keyring.delete('name') assert_raises(KeyError, keyring.delete, 'name', 'user') assert_raises(KeyError, keyring.delete, 'name') assert_equal(keyring.get('name', 'user'), None)
def test_cred1_enter_new(): keyring = MemoryKeyring() cred = UserPassword("name", keyring=keyring) assert_false(cred.is_known) assert_equal(cred.enter_new(), None) assert_true(cred.is_known) assert_equal(keyring.get('name', 'user'), 'user1') assert_equal(keyring.get('name', 'password'), 'password1') keyring.delete('name') assert_raises(KeyError, keyring.delete, 'name', 'user') assert_raises(KeyError, keyring.delete, 'name') assert_equal(keyring.get('name', 'user'), None)
def _get_metadata_content(metadata): assert_is_not_none(metadata) metadata_instances = tuple(metadata.extractor_runs()) assert_true(len(metadata_instances) == 1) extractor_name, extractor_runs = metadata_instances[0] eq_(extractor_name, metadata_template["extractor_name"]) instances = tuple(extractor_runs.get_instances()) assert_true(len(instances), 1) return instances[0].metadata_content
def test_discover_ds_trace(path, otherdir): ds = make_demo_hierarchy_datasets( path, {k: v for k, v in demo_hierarchy.items() if k in ['a', 'd']}) a = opj(ds.path, 'a') aa = opj(a, 'aa') d = opj(ds.path, 'd') db = opj(d, 'db') # we have to check whether we get the correct hierarchy, as the test # subject is also involved in this assert_true(exists(opj(db, 'file_db'))) ds.add('.', recursive=True) ok_clean_git(ds.path) # now two datasets which are not available locally, but we # know about them (e.g. from metadata) dba = opj(db, 'sub', 'dba') dbaa = opj(dba, 'subsub', 'dbaa') for input, eds, goal in ( ([], None, {}), ([ds.path], None, {}), ([otherdir], None, {}), ([opj(ds.path, 'nothere')], None, {}), ([opj(d, 'nothere')], None, {}), ([opj(db, 'nothere')], None, {}), ([a], None, {ds.path: set([a])}), ([aa, a], None, {ds.path: set([a]), a: set([aa])}), ([db], None, {ds.path: set([d]), d: set([db])}), ([opj(db, 'file_db')], None, {ds.path: set([d]), d: set([db])}), # just a regular non-existing path ([dba], None, {}), # but if we inject this knowledge it must come back out # as the child of the closest existing dataset ([dba], [dba], {ds.path: set([d]), d: set([db]), db: set([dba])}), # regardless of the depth ([dbaa], [dbaa], {ds.path: set([d]), d: set([db]), db: set([dbaa])}), ([dba, dbaa], [dba, dbaa], {ds.path: set([d]), d: set([db]), db: set([dba, dbaa])}), # we can simply add existing and non-existing datasets to the # include list get the desired result ([d, dba, dbaa], [d, dba, dbaa], {ds.path: set([d]), d: set([db]), db: set([dba, dbaa])}), ): spec = {} discover_dataset_trace_to_targets(ds.path, input, [], spec, includeds=eds) assert_dict_equal(spec, goal)
def test_discover_ds_trace(path, otherdir): ds = make_demo_hierarchy_datasets( path, {k: v for k, v in demo_hierarchy.items() if k in ['a', 'd']}) a = opj(ds.path, 'a') aa = opj(a, 'aa') d = opj(ds.path, 'd') db = opj(d, 'db') # we have to check whether we get the correct hierarchy, as the test # subject is also involved in this assert_true(exists(opj(db, 'file_db'))) ds.add('.', recursive=True) ok_clean_git(ds.path) # now two datasets which are not available locally, but we # know about them (e.g. from metadata) dba = opj(db, 'sub', 'dba') dbaa = opj(dba, 'subsub', 'dbaa') for input, eds, goal in ( ([], None, {}), ([ds.path], None, {}), ([otherdir], None, {}), ([opj(ds.path, 'nothere')], None, {}), ([opj(d, 'nothere')], None, {}), ([opj(db, 'nothere')], None, {}), ([a], None, {ds.path: set([a])}), ([aa, a], None, {ds.path: set([a]), a: set([aa])}), ([db], None, {ds.path: set([d]), d: set([db])}), ([opj(db, 'file_db')], None, {ds.path: set([d]), d: set([db])}), # just a regular non-existing path ([dba], None, {}), # but if we inject this knowledge it must come back out # as the child of the closest existing dataset ([dba], [dba], {ds.path: set([d]), d: set([db]), db: set([dba])}), # regardless of the depth ([dbaa], [dbaa], {ds.path: set([d]), d: set([db]), db: set([dbaa])}), ([dba, dbaa], [dba, dbaa], {ds.path: set([d]), d: set([db]), db: set([dba, dbaa])}), # we can simply add existing and non-existing datasets to the # include list get the desired result ([d, dba, dbaa], [d, dba, dbaa], {ds.path: set([d]), d: set([db]), db: set([dba, dbaa])}), ): spec = {} discover_dataset_trace_to_targets(ds.path, input, [], spec, includeds=eds) assert_dict_equal(spec, goal)
def test_ephemeral(ds_path, store_path, clone_path): dspath = Path(ds_path) store = Path(store_path) file_test = Path('file1.txt') file_testsub = Path('sub') / 'other.txt' # create the original dataset ds = Dataset(dspath) ds.create(force=True) ds.save() # put into store: ds.create_sibling_ria("ria+{}".format(store.as_uri()), "riastore") ds.publish(to="riastore", transfer_data="all") # now, get an ephemeral clone from the RIA store: eph_clone = clone('ria+{}#{}'.format(store.as_uri(), ds.id), clone_path, reckless="ephemeral") # ephemeral clone was properly linked (store has bare repos!): clone_annex = (eph_clone.repo.dot_git / 'annex') assert_true(clone_annex.is_symlink()) assert_true(clone_annex.resolve().samefile(store / ds.id[:3] / ds.id[3:] / 'annex')) if not eph_clone.repo.is_managed_branch(): # TODO: We can't properly handle adjusted branch yet # we don't need to get files in order to access them: assert_equal((eph_clone.pathobj / file_test).read_text(), "some") assert_equal((eph_clone.pathobj / file_testsub).read_text(), "other") # can we unlock those files? eph_clone.unlock(file_test) # change content (eph_clone.pathobj / file_test).write_text("new content") eph_clone.save() # new content should already be in store # (except the store doesn't know yet) res = eph_clone.repo.fsck(remote="riastore-storage", fast=True) assert_equal(len(res), 2) assert_result_count(res, 1, success=True, file=file_test.as_posix()) assert_result_count(res, 1, success=True, file=file_testsub.as_posix()) # push back git history eph_clone.publish(to=DEFAULT_REMOTE, transfer_data="none") # get an update in origin ds.update(merge=True, reobtain_data=True) assert_equal((ds.pathobj / file_test).read_text(), "new content")
def test_object_parameter(): with \ patch("datalad_metalad.add.add_file_metadata") as fp, \ patch("datalad_metalad.add.add_dataset_metadata") as dp: meta_add( metadata={ **metadata_template, "type": "file", "path": "d1/d1.1./f1.1.1" }) assert_true(fp.call_count == 1) assert_true(dp.call_count == 0)
def test_dont_trip_over_missing_subds(path): ds1 = Dataset(opj(path, 'ds1')).create() ds2 = Dataset(opj(path, 'ds2')).create() subds2 = ds1.install(source=ds2.path, path='subds2', result_xfm='datasets', return_type='item-or-list') assert_true(subds2.is_installed()) assert_in('subds2', ds1.subdatasets(result_xfm='relpaths')) subds2.uninstall() assert_in('subds2', ds1.subdatasets(result_xfm='relpaths')) assert_false(subds2.is_installed()) # see if it wants to talk to github (and fail), or if it trips over something # before assert_raises(gh.BadCredentialsException, ds1.create_sibling_github, 'bogus', recursive=True, github_login='******') # inject remote config prior run assert_not_in('github', ds1.repo.get_remotes()) # fail on existing ds1.repo.add_remote('github', 'http://nothere') assert_in_results( ds1.create_sibling_github('bogus', recursive=True, github_login='******', on_failure='ignore'), status='error', message=('already has a configured sibling "%s"', 'github'), ) assert_in_results( ds1.create_sibling_github('bogus', recursive=True, github_login='******', existing='reconfigure'), status='notneeded', message=('already has a configured sibling "%s"', 'github'), ) assert_in_results( ds1.create_sibling_github( 'bogus', recursive=True, github_login='******', existing='skip', ), status='notneeded', message=('already has a configured sibling "%s"', 'github'), )
def test_zip_archive(path): ds = Dataset(opj(path, 'ds')).create(force=True, no_annex=True) ds.save() with chpwd(path): ds.export_archive(filename='my', archivetype='zip') assert_true(os.path.exists('my.zip')) custom1_md5 = md5sum('my.zip') time.sleep(1.1) ds.export_archive(filename='my', archivetype='zip') assert_equal(md5sum('my.zip'), custom1_md5) # should be able to export without us cd'ing to that ds directory ds.export_archive(filename=ds.path, archivetype='zip') default_name = 'datalad_{}.zip'.format(ds.id) assert_true(os.path.exists(os.path.join(ds.path, default_name)))
def test_credentials_from_env(): keyring = Keyring() cred = AWS_S3("test-s3", keyring=keyring) assert_false(cred.is_known) assert_equal(cred.get('key_id'), None) assert_equal(cred.get('secret_id'), None) with patch.dict('os.environ', {'DATALAD_test_s3_key_id': '1'}): assert_equal(cred.get('key_id'), '1') assert_false(cred.is_known) with patch.dict('os.environ', {'DATALAD_test_s3_secret_id': '2'}): assert_equal(cred.get('key_id'), '1') assert_equal(cred.get('secret_id'), '2') assert_true(cred.is_known) assert_false(cred.is_known) # no memory of the past
def test_tarball(path): ds = Dataset(opj(path, 'ds')).create(force=True) ds.save(all_changes=True) committed_date = ds.repo.get_committed_date() with chpwd(path): _mod, tarball1 = ds.export('tarball') assert(not isabs(tarball1)) tarball1 = opj(path, tarball1) default_outname = opj(path, 'datalad_{}.tar.gz'.format(ds.id)) assert_equal(tarball1, default_outname) assert_true(os.path.exists(default_outname)) custom_outname = opj(path, 'myexport.tar.gz') # feed in without extension ds.export('tarball', output=custom_outname[:-7]) assert_true(os.path.exists(custom_outname)) custom1_md5 = md5sum(custom_outname) # encodes the original tarball filename -> different checksum, despit # same content assert_not_equal(md5sum(default_outname), custom1_md5) # should really sleep so if they stop using time.time - we know time.sleep(1.1) ds.export('tarball', output=custom_outname) # should not encode mtime, so should be identical assert_equal(md5sum(custom_outname), custom1_md5) def check_contents(outname, prefix): with tarfile.open(outname) as tf: nfiles = 0 for ti in tf: # any annex links resolved assert_false(ti.issym()) ok_startswith(ti.name, prefix + '/') assert_equal(ti.mtime, committed_date) if '.datalad' not in ti.name: # ignore any files in .datalad for this test to not be # susceptible to changes in how much we generate a meta info nfiles += 1 # we have exactly three files, and expect no content for any directory assert_equal(nfiles, 3) check_contents(default_outname, 'datalad_%s' % ds.id) check_contents(custom_outname, 'myexport')
def test_cred1_enter_new(): keyring = MemoryKeyring() cred = UserPassword("name", keyring=keyring) assert_false(cred.is_known) assert_equal(cred.enter_new(), None) assert_true(cred.is_known) assert_equal(keyring.get('name', 'user'), 'user1') assert_equal(keyring.get('name', 'password'), 'password1') keyring.delete('name') assert_raises(KeyError, keyring.delete, 'name', 'user') assert_raises(KeyError, keyring.delete, 'name') assert_equal(keyring.get('name', 'user'), None) # Test it blowing up if we provide unknown field with assert_raises(ValueError) as cme: cred.enter_new(username='******') assert_in('field(s): username. Known but not specified: password, user', str(cme.exception)) # Test that if user is provided, it is not asked cred.enter_new(user='******') assert_equal(keyring.get('name', 'user'), 'user2') assert_equal(keyring.get('name', 'password'), 'newpassword')
def test_archive(path): ds = Dataset(opj(path, 'ds')).create(force=True) ds.save() committed_date = ds.repo.get_commit_date() default_outname = opj(path, 'datalad_{}.tar.gz'.format(ds.id)) with chpwd(path): res = list(ds.export_archive()) assert_status('ok', res) assert_result_count(res, 1) assert(isabs(res[0]['path'])) assert_true(os.path.exists(default_outname)) custom_outname = opj(path, 'myexport.tar.gz') # feed in without extension ds.export_archive(filename=custom_outname[:-7]) assert_true(os.path.exists(custom_outname)) custom1_md5 = md5sum(custom_outname) # encodes the original archive filename -> different checksum, despit # same content assert_not_equal(md5sum(default_outname), custom1_md5) # should really sleep so if they stop using time.time - we know time.sleep(1.1) ds.export_archive(filename=custom_outname) # should not encode mtime, so should be identical assert_equal(md5sum(custom_outname), custom1_md5) def check_contents(outname, prefix): with tarfile.open(outname) as tf: nfiles = 0 for ti in tf: # any annex links resolved assert_false(ti.issym()) ok_startswith(ti.name, prefix + '/') assert_equal(ti.mtime, committed_date) if '.datalad' not in ti.name: # ignore any files in .datalad for this test to not be # susceptible to changes in how much we generate a meta info nfiles += 1 # we have exactly four files (includes .gitattributes for default # MD5E backend), and expect no content for any directory assert_equal(nfiles, 4) check_contents(default_outname, 'datalad_%s' % ds.id) check_contents(custom_outname, 'myexport') # now loose some content ds.drop('file_up', check=False) assert_raises(IOError, ds.export_archive, filename=opj(path, 'my')) ds.export_archive(filename=opj(path, 'partial'), missing_content='ignore') assert_true(os.path.exists(opj(path, 'partial.tar.gz')))
def test_procedure_discovery(path, super_path): ps = run_procedure(discover=True) # there are a few procedures coming with datalad, needs to find them assert_true(len(ps) > 2) # we get three essential properties eq_( sum(['procedure_type' in p and 'procedure_callfmt' in p and 'path' in p for p in ps]), len(ps)) # set up dataset with registered procedure (c&p from test_basics): ds = Dataset(path).create(force=True) ds.run_procedure('setup_yoda_dataset') ok_clean_git(ds.path) # configure dataset to look for procedures in its code folder ds.config.add( 'datalad.locations.dataset-procedures', 'code', where='dataset') # configure dataset to run the demo procedure prior to the clean command ds.config.add( 'datalad.clean.proc-pre', 'datalad_test_proc', where='dataset') ds.add(op.join('.datalad', 'config')) # run discovery on the dataset: ps = ds.run_procedure(discover=True) # still needs to find procedures coming with datalad assert_true(len(ps) > 2) # we get three essential properties eq_( sum(['procedure_type' in p and 'procedure_callfmt' in p and 'path' in p for p in ps]), len(ps)) # dataset's procedure needs to be in the results assert_in_results(ps, path=op.join(ds.path, 'code', 'datalad_test_proc.py')) # make it a subdataset and try again: super = Dataset(super_path).create() super.install('sub', source=ds.path) ps = super.run_procedure(discover=True) # still needs to find procedures coming with datalad assert_true(len(ps) > 2) # we get three essential properties eq_( sum(['procedure_type' in p and 'procedure_callfmt' in p and 'path' in p for p in ps]), len(ps)) # dataset's procedure needs to be in the results assert_in_results(ps, path=op.join(super.path, 'sub', 'code', 'datalad_test_proc.py')) if not on_windows: # no symlinks import os # create a procedure which is a broken symlink, but recognizable as a # python script: os.symlink(op.join(super.path, 'sub', 'not_existent'), op.join(super.path, 'sub', 'code', 'broken_link_proc.py')) # broken symlink at procedure location, but we can't tell, whether it is # an actual procedure without any guess on how to execute it: os.symlink(op.join(super.path, 'sub', 'not_existent'), op.join(super.path, 'sub', 'code', 'unknwon_broken_link')) ps = super.run_procedure(discover=True) # still needs to find procedures coming with datalad and the dataset # procedure registered before assert_true(len(ps) > 3) assert_in_results(ps, path=op.join(super.path, 'sub', 'code', 'broken_link_proc.py'), state='absent') assert_not_in_results(ps, path=op.join(super.path, 'sub', 'code', 'unknwon_broken_link'))
def test_configs(path): # set up dataset with registered procedure (c&p from test_basics): ds = Dataset(path).create(force=True) ds.run_procedure('setup_yoda_dataset') ok_clean_git(ds.path) # configure dataset to look for procedures in its code folder ds.config.add( 'datalad.locations.dataset-procedures', 'code', where='dataset') # 1. run procedure based on execution guessing by run_procedure: ds.run_procedure(spec=['datalad_test_proc', 'some_arg']) # look for traces ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'some_arg\n') # 2. now configure specific call format including usage of substitution config # for run: ds.config.add( 'datalad.procedures.datalad_test_proc.call-format', 'python "{script}" "{ds}" {{mysub}} {args}', where='dataset' ) ds.config.add( 'datalad.run.substitutions.mysub', 'dataset-call-config', where='dataset' ) # TODO: Should we allow for --inputs/--outputs arguments for run_procedure # (to be passed into run)? ds.unlock("fromproc.txt") # run again: ds.run_procedure(spec=['datalad_test_proc', 'some_arg']) # look for traces ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'dataset-call-config\n') # 3. have a conflicting config at user-level, which should override the # config on dataset level: ds.config.add( 'datalad.procedures.datalad_test_proc.call-format', 'python "{script}" "{ds}" local {args}', where='local' ) ds.unlock("fromproc.txt") # run again: ds.run_procedure(spec=['datalad_test_proc', 'some_arg']) # look for traces ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'local\n') # 4. get configured help message: r = ds.run_procedure('datalad_test_proc', help_proc=True, on_failure='ignore') assert_true(len(r) == 1) assert_in_results(r, status="impossible") ds.config.add( 'datalad.procedures.datalad_test_proc.help', "This is a help message", where='dataset' ) r = ds.run_procedure('datalad_test_proc', help_proc=True) assert_true(len(r) == 1) assert_in_results(r, message="This is a help message", status='ok')
def test_is_legal_metafield(): for legal in ["legal", "0", "legal_"]: assert_true(au.is_legal_metafield(legal)) for notlegal in ["_not", "with space"]: assert_false(au.is_legal_metafield(notlegal))
def test_invalid_call(): # needs spec or discover assert_raises(InsufficientArgumentsError, run_procedure) res = run_procedure('unknown', on_failure='ignore') assert_true(len(res) == 1) assert_in_results(res, status="impossible")
def test_recursive_save(path): ds = Dataset(path).create() # nothing to save assert_false(ds.save()) subds = ds.create('sub') # subdataset presence already saved ok_clean_git(ds.path) subsubds = subds.create('subsub') assert_equal( ds.get_subdatasets(recursive=True, absolute=True, fulfilled=True), [subsubds.path, subds.path]) newfile_name = opj(subsubds.path, 'test') with open(newfile_name, 'w') as f: f.write('some') # saves the status change of the subdataset due to the subsubdataset addition assert_equal(ds.save(all_changes=True), [ds]) # make the new file known to its dataset # with #1141 this would be #ds.add(newfile_name, save=False) subsubds.add(newfile_name, save=False) # but remains dirty because of the untracked file down below assert ds.repo.dirty # auto-add will save nothing deep down without recursive assert_equal(ds.save(all_changes=True), []) assert ds.repo.dirty # with recursive pick up the change in subsubds assert_equal(ds.save(all_changes=True, recursive=True), [subsubds, subds, ds]) # modify content in subsub and try saving testfname = newfile_name subsubds.unlock(testfname) with open(opj(ds.path, testfname), 'w') as f: f.write('I am in here!') # the following should all do nothing # no auto_add assert_false(ds.save()) # no recursive assert_false(ds.save(all_changes=True)) # an explicit target saves only the corresponding dataset assert_equal(save(files=[testfname]), [subsubds]) # plain recursive without any files given will save the beast assert_equal(ds.save(recursive=True), [subds, ds]) # there is nothing else to save assert_false(ds.save(all_changes=True, recursive=True)) # one more time and check that all datasets in the hierarchy get updated states = [d.repo.get_hexsha() for d in (ds, subds, subsubds)] testfname = opj('sub', 'subsub', 'saveme2') with open(opj(ds.path, testfname), 'w') as f: f.write('I am in here!') assert_true(ds.save(all_changes=True, recursive=True)) newstates = [d.repo.get_hexsha() for d in (ds, subds, subsubds)] for old, new in zip(states, newstates): assert_not_equal(old, new) # now let's check saving "upwards" assert not subds.repo.dirty create_tree(subds.path, {"testnew": 'smth', "testadded": "added"}) subds.repo.add("testadded") indexed_files = subds.repo.get_indexed_files() assert subds.repo.dirty assert ds.repo.dirty assert not subsubds.repo.dirty create_tree(subsubds.path, {"testnew2": 'smth'}) assert subsubds.repo.dirty # and indexed files didn't change assert_equal(indexed_files, subds.repo.get_indexed_files()) ok_clean_git(subds.repo, untracked=['testnew'], index_modified=['subsub'], head_modified=['testadded']) subsubds.save(message="savingtestmessage", super_datasets=True, all_changes=True) ok_clean_git(subsubds.repo) # but its super should have got only the subsub saved # not the file we created ok_clean_git(subds.repo, untracked=['testnew'], head_modified=['testadded']) # check commits to have correct messages # there are no more dedicated superdataset-save commits anymore, because # superdatasets get saved as part of the processed hierarchy and can contain # other parts in the commit (if so instructed) assert_equal(next(subsubds.repo.get_branch_commits('master')).message.rstrip(), 'savingtestmessage') assert_equal(next(subds.repo.get_branch_commits('master')).message.rstrip(), 'savingtestmessage') assert_equal(next(ds.repo.get_branch_commits('master')).message.rstrip(), 'savingtestmessage')