def test_get_most_obscure_supported_name(): n = get_most_obscure_supported_name() ok_startswith(n, OBSCURE_PREFIX) ok_(len(OBSCURE_FILENAMES) > 1) # from more complex to simpler ones ok_(len(OBSCURE_FILENAMES[0]) > len(OBSCURE_FILENAMES[-1])) print(repr(n))
def test_help_np(): stdout, stderr = run_main(['--help-np']) # Let's extract section titles: # enough of bin/datalad and .tox/py27/bin/datalad -- guarantee consistency! ;) ok_startswith(stdout, 'Usage: datalad') # Sections start/end with * if ran under DATALAD_HELP2MAN mode sections = [l[1:-1] for l in filter(re.compile('^\*.*\*$').match, stdout.split('\n'))] # but order is still not guaranteed (dict somewhere)! TODO # see https://travis-ci.org/datalad/datalad/jobs/80519004 # thus testing sets for s in {'Commands for dataset operations', 'Commands for metadata handling', 'Miscellaneous commands', 'General information', 'Global options', 'Plumbing commands', 'Plugins'}: assert_in(s, sections) # none of the lines must be longer than 80 chars # TODO: decide on create-sibling and possibly # rewrite-urls accepted_width = get_console_width() long_lines = ["%d %s" % (len(l), l) for l in stdout.split('\n') if len(l) > accepted_width and '{' not in l # on nd70 summary line is unsplit ] if long_lines: raise AssertionError( "Following lines in --help output were longer than %s chars:\n%s" % (accepted_width, '\n'.join(long_lines)) )
def test_install_plain_git(src, path): # make plain git repo gr = GitRepo(src, create=True) gr.git_add('test.txt') gr.git_commit('demo') # now install it somewhere else ds = install(path=path, source=src) # stays plain Git repo ok_(isinstance(ds.repo, GitRepo)) # now go back to original ds = Dataset(src) ok_(isinstance(ds.repo, GitRepo)) # installing a file must fail, as we decided not to perform magical upgrades # GitRepo -> AnnexRepo assert_raises(RuntimeError, ds.install, path='test2.txt', source=opj(src, 'test2.txt')) # but works when forced ifiles = ds.install(path='test2.txt', source=opj(src, 'test2.txt'), add_data_to_git=True) ok_startswith(ifiles, ds.path) ok_(ifiles.endswith('test2.txt')) ok_('test2.txt' in ds.repo.get_indexed_files())
def test_with_tempfile_dir_via_env_variable(): target = os.path.join(os.path.expanduser("~"), "dataladtesttmpdir") assert_false(os.path.exists(target), "directory %s already exists." % target) with patch.dict('os.environ', {'DATALAD_TESTS_TEMP_DIR': target}): filename = _with_tempfile_decorated_dummy() ok_startswith(filename, target)
def test_with_tempfile_dir_via_env_variable(): target = os.path.join(os.path.expanduser("~"), "dataladtesttmpdir") assert_false(os.path.exists(target), "directory %s already exists." % target) with patch_config({'datalad.tests.temp.dir': target}): filename = _with_tempfile_decorated_dummy() ok_startswith(filename, target)
def test_install_dataset_from_just_source(url, path): with chpwd(path, mkdir=True): ds = install(source=url) ok_startswith(ds.path, path) ok_(ds.is_installed()) ok_clean_git(ds.path, annex=False)
def test_ok_startswith(): ok_startswith('abc', 'abc') ok_startswith('abc', 'a') ok_startswith('abc', '') ok_startswith(' abc', ' ') ok_startswith('abc\r\n', 'a') # no effect from \r\n etc assert_raises(AssertionError, ok_startswith, 'abc', 'b') assert_raises(AssertionError, ok_startswith, 'abc', 'abcd')
def test_install_dataset_from_just_source(url, path): with chpwd(path, mkdir=True): ds = install(source=url) ok_startswith(ds.path, path) ok_(ds.is_installed()) ok_(GitRepo.is_valid_repo(ds.path)) ok_clean_git(ds.path, annex=None) assert_in('INFO.txt', ds.repo.get_indexed_files())
def test_clone_dataset_from_just_source(url, path): with chpwd(path, mkdir=True): ds = clone(url, result_xfm='datasets', return_type='item-or-list') ok_startswith(ds.path, path) ok_(ds.is_installed()) ok_(GitRepo.is_valid_repo(ds.path)) ok_clean_git(ds.path, annex=None) assert_in('INFO.txt', ds.repo.get_indexed_files())
def test_get_metadata(): all_meta = get_metadata() assert len(all_meta) > 50 # so we have for all datasets # and each one of them should be a string (xml) assert all(x.startswith('<?xml') for x in all_meta.values()) # but we could request one specific one aa1_meta = get_metadata('aa-1') ok_startswith(aa1_meta, '<?xml')
def test_install_dataset_from_instance(src, dst): origin = Dataset(src) clone = install(source=origin, path=dst) assert_is_instance(clone, Dataset) ok_startswith(clone.path, dst) ok_(clone.is_installed()) ok_(GitRepo.is_valid_repo(clone.path)) ok_clean_git(clone.path, annex=None) assert_in('INFO.txt', clone.repo.get_indexed_files())
def test_version(): stdout, stderr = run_main(['--version'], expect_stderr=True) # and output should contain our version, copyright, license # https://hg.python.org/cpython/file/default/Doc/whatsnew/3.4.rst#l1952 out = stdout if sys.version_info >= (3, 4) else stderr ok_startswith(out, 'datalad %s\n' % datalad.__version__) in_("Copyright", out) in_("Permission is hereby granted", out)
def test_with_tempfile_default_prefix(d1): d = basename(d1) short = 'datalad_temp_' full = short + \ 'test_with_tempfile_default_prefix' if on_windows: ok_startswith(d, short) nok_startswith(d, full) else: ok_startswith(d, full)
def test_version(): stdout, stderr = run_main(['--version'], expect_stderr=True) # and output should contain our version, copyright, license # https://hg.python.org/cpython/file/default/Doc/whatsnew/3.4.rst#l1952 out = stdout if sys.version_info >= (3, 4) else stderr ok_startswith(out, 'datalad %s\n' % datalad.__version__) # since https://github.com/datalad/datalad/pull/2733 no license in --version assert_not_in("Copyright", out) assert_not_in("Permission is hereby granted", out)
def test_install_dataset_from_just_source_via_path(url, path): # for remote urls only, the source could be given to `path` # to allows for simplistic cmdline calls with chpwd(path, mkdir=True): ds = install(path=url) ok_startswith(ds.path, path) ok_(ds.is_installed()) ok_clean_git(ds.path, annex=False) assert_true(os.path.lexists(opj(ds.path, 'test-annex.dat')))
def test_get_versioned_url(): get_test_providers( 's3://openfmri/tarballs' ) # to verify having credentials to access openfmri via S3 for url_pref in ('http://openfmri.s3.amazonaws.com', 'https://s3.amazonaws.com/openfmri'): eq_(get_versioned_url(url_pref + "/tarballs/ds001_raw.tgz"), url_pref + "/tarballs/ds001_raw.tgz?versionId=null") eq_(get_versioned_url(url_pref + "/tarballs/ds001_raw.tgz?param=1"), url_pref + "/tarballs/ds001_raw.tgz?param=1&versionId=null") # We don't duplicate the version if it already exists. eq_( get_versioned_url(url_pref + "/tarballs/ds001_raw.tgz?versionId=null"), url_pref + "/tarballs/ds001_raw.tgz?versionId=null") # something is wrong there #print(get_versioned_url("http://openfmri.s3.amazonaws.com/ds001/demographics.txt")) eq_(get_versioned_url("someurl"), "someurl") # should just return original one assert_raises(RuntimeError, get_versioned_url, "someurl", guarantee_versioned=True) # TODO: on a bucket without versioning url = "http://datalad-test0-nonversioned.s3.amazonaws.com/2versions-removed-recreated.txt" eq_(get_versioned_url(url), url) eq_(get_versioned_url(url, return_all=True), [url]) assert_raises(NotImplementedError, get_versioned_url, "s3://buga") urls = get_versioned_url( "http://datalad-test0-versioned.s3.amazonaws.com/2versions-removed-recreated.txt", return_all=True, verify=True) eq_(len(set(urls)), len(urls)) # all unique for url in urls: # so we didn't grab other files along with the same prefix ok_startswith( url, 'http://datalad-test0-versioned.s3.amazonaws.com/2versions-removed-recreated.txt?versionId=' ) # Update a versioned URL with a newer version tag. url_3ver = "http://datalad-test0-versioned.s3.amazonaws.com/3versions-allversioned.txt" url_3ver_input = url_3ver + "?versionId=b.qCuh7Sg58VIYj8TVHzbRS97EvejzEl" eq_(get_versioned_url(url_3ver_input), url_3ver_input) eq_(get_versioned_url(url_3ver_input, update=True), url_3ver + "?versionId=Kvuind11HZh._dCPaDAb0OY9dRrQoTMn")
def test_cmdline_example_to_rst(): # don't puke on nothing out = fmt.cmdline_example_to_rst(SIO('')) out.seek(0) ok_startswith(out.read(), '.. AUTO-GENERATED') out = fmt.cmdline_example_to_rst(SIO(''), ref='dummy') out.seek(0) assert_in('.. dummy:', out.read()) # full scale test out = fmt.cmdline_example_to_rst(SIO(demo_example), ref='mydemo') out.seek(0) assert_in('.. code-block:: sh', out.read())
def test_install_dataset_from_just_source_via_path(url, path): # for remote urls only, the source could be given to `path` # to allows for simplistic cmdline calls # Q (ben): remote urls only? Sure? => TODO with chpwd(path, mkdir=True): ds = install(url) ok_startswith(ds.path, path) ok_(ds.is_installed()) ok_(GitRepo.is_valid_repo(ds.path)) ok_clean_git(ds.path, annex=None) assert_in('INFO.txt', ds.repo.get_indexed_files())
def test_cmdline_example_to_rst(): # don't puke on nothing out = fmt.cmdline_example_to_rst(SIO('')) out.seek(0) ok_startswith(out.read(), '.. AUTO-GENERATED') out = fmt.cmdline_example_to_rst(SIO(''), ref='dummy') out.seek(0) assert_in('.. dummy:', out.read()) # full scale test out = fmt.cmdline_example_to_rst( SIO(demo_example), ref='mydemo') out.seek(0) assert_in('.. code-block:: sh', out.read())
def check_contents(outname, prefix): with tarfile.open(outname) as tf: nfiles = 0 for ti in tf: # any annex links resolved assert_false(ti.issym()) ok_startswith(ti.name, prefix + '/') assert_equal(ti.mtime, committed_date) if '.datalad' not in ti.name: # ignore any files in .datalad for this test to not be # susceptible to changes in how much we generate a meta info nfiles += 1 # we have exactly three files, and expect no content for any directory assert_equal(nfiles, 3)
def test_get_metadata(): try: all_meta = get_metadata() # something broke somewhere and ATM returns no hits # Reported to CRCNS folks if len(all_meta) < 2: raise SkipTest("Known to fail: wait for life to become better") assert len(all_meta) > 50 # so we have for all datasets # and each one of them should be a string (xml) assert all(x.startswith('<?xml') for x in all_meta.values()) # but we could request one specific one aa1_meta = get_metadata('aa-1') ok_startswith(aa1_meta, '<?xml') except AccessFailedError as e: if str(e).startswith('Access to https://search.datacite.org') and \ str(e).endswith('has failed: status code 502'): raise SkipTest("Probably datacite.org blocked us once again")
def test_version(): # we just get a version if not asking for a version of some command stdout, stderr = run_main(['--version'], expect_stderr=True) eq_(stdout.rstrip(), "datalad %s" % datalad.__version__) stdout, stderr = run_main(['clone', '--version'], expect_stderr=True) ok_startswith(stdout, 'datalad %s\n' % datalad.__version__) # since https://github.com/datalad/datalad/pull/2733 no license in --version assert_not_in("Copyright", stdout) assert_not_in("Permission is hereby granted", stdout) try: import datalad_container except ImportError: pass # not installed, cannot test with extension else: stdout, stderr = run_main(['containers-list', '--version'], expect_stderr=True) eq_(stdout, 'datalad_container %s\n' % datalad_container.__version__)
def check_basic_xnat_interface(url, project, empty_project, subjects): nitrc = XNATServer(url) projects = nitrc.get_projects() # verify that we still have projects we want! assert_in(project, projects) if empty_project: all_projects = nitrc.get_projects(drop_empty=False) assert len(all_projects) > len(projects) assert empty_project in all_projects assert empty_project not in projects projects_public = nitrc.get_projects(limit='public') import json print(json.dumps(projects_public, indent=2)) assert len(projects_public) <= len(projects) assert not set(projects_public).difference(projects) eq_(set(projects), set(nitrc.get_projects(limit=PROJECT_ACCESS_TYPES))) subjects_ = nitrc.get_subjects(project) assert len(subjects_) experiments = nitrc.get_experiments(project, subjects[0]) # NOTE: assumption that there is only one experiment files1 = nitrc.get_files(project, subjects[0], experiments.keys()[0]) assert files1 experiments = nitrc.get_experiments(project, subjects[1]) files2 = nitrc.get_files(project, subjects[1], experiments.keys()[0]) assert files2 ok_startswith(files1[0]['uri'], '/data') gen = nitrc.get_all_files_for_project(project, subjects=subjects, experiments=[experiments.keys()[0]]) assert_is_generator(gen) all_files = list(gen) if len(experiments) == 1: eq_(len(all_files), len(files1) + len(files2)) else: # there should be more files due to multiple experiments which we didn't actually check assert len(all_files) > len(files1) + len(files2)
def test_add_readme(path): ds = Dataset(path).create(force=True) ds.save() ds.aggregate_metadata() assert_repo_status(ds.path) assert_status('ok', ds.add_readme()) # should use default name content = open(opj(path, 'README.md')).read() ok_startswith( content, """\ # Dataset "demo_ds" this is for play ### Authors - Betty - Tom ### License PDDL ## General information This is a DataLad dataset (id: {id}). """.format(id=ds.id)) # make sure that central README references are present assert_in( """More information on how to install DataLad and [how to install](http://handbook.datalad.org/en/latest/intro/installation.html) it can be found in the [DataLad Handbook](https://handbook.datalad.org/en/latest/index.html). """, content) # no unexpectedly long lines assert all([len(l) < 160 for l in content.splitlines()]) # should skip on re-run assert_status('notneeded', ds.add_readme())
def test_usage_on_insufficient_args(): stdout, stderr = run_main(['install'], exit_code=2, expect_stderr=True) ok_startswith(stderr, 'usage:')
def test_ls_json(topdir, topurl): annex = AnnexRepo(topdir, create=True) ds = Dataset(topdir) # create some file and commit it with open(opj(ds.path, 'subdsfile.txt'), 'w') as f: f.write('123') ds.add(path='subdsfile.txt') ds.save("Hello!", version_tag=1) # add a subdataset ds.install('subds', source=topdir) subdirds = ds.create(_path_('dir/subds2'), force=True) subdirds.add('file') git = GitRepo(opj(topdir, 'dir', 'subgit'), create=True) # create git repo git.add(opj(topdir, 'dir', 'subgit', 'fgit.txt')) # commit to git to init git repo git.commit() annex.add(opj(topdir, 'dir', 'subgit')) # add the non-dataset git repo to annex annex.add(opj(topdir, 'dir')) # add to annex (links) annex.drop(opj(topdir, 'dir', 'subdir', 'file2.txt'), options=['--force']) # broken-link annex.commit() git.add('fgit.txt') # commit to git to init git repo git.commit() # annex.add doesn't add submodule, so using ds.add ds.add(opj('dir', 'subgit')) # add the non-dataset git repo to annex ds.add('dir') # add to annex (links) ds.drop(opj('dir', 'subdir', 'file2.txt'), check=False) # broken-link # register "external" submodule by installing and uninstalling it ext_url = topurl + '/dir/subgit/.git' # need to make it installable via http Runner()('git update-server-info', cwd=opj(topdir, 'dir', 'subgit')) ds.install(opj('dir', 'subgit_ext'), source=ext_url) ds.uninstall(opj('dir', 'subgit_ext')) meta_dir = opj('.git', 'datalad', 'metadata') def get_metahash(*path): if not path: path = ['/'] return hashlib.md5(opj(*path).encode('utf-8')).hexdigest() def get_metapath(dspath, *path): return _path_(dspath, meta_dir, get_metahash(*path)) def get_meta(dspath, *path): with open(get_metapath(dspath, *path)) as f: return js.load(f) # Let's see that there is no crash if one of the files is available only # in relaxed URL mode, so no size could be picked up ds.repo.add_url_to_file( 'fromweb', topurl + '/noteventhere', options=['--relaxed']) for all_ in [True, False]: # recurse directories for recursive in [True, False]: for state in ['file', 'delete']: # subdataset should have its json created and deleted when # all=True else not subds_metapath = get_metapath(opj(topdir, 'subds')) exists_prior = exists(subds_metapath) #with swallow_logs(), swallow_outputs(): dsj = _ls_json( topdir, json=state, all_=all_, recursive=recursive ) ok_startswith(dsj['tags'], '1-') exists_post = exists(subds_metapath) # print("%s %s -> %s" % (state, exists_prior, exists_post)) assert_equal(exists_post, (state == 'file' and recursive)) # root should have its json file created and deleted in all cases ds_metapath = get_metapath(topdir) assert_equal(exists(ds_metapath), state == 'file') # children should have their metadata json's created and deleted only when recursive=True child_metapath = get_metapath(topdir, 'dir', 'subdir') assert_equal(exists(child_metapath), (state == 'file' and all_)) # ignored directories should not have json files created in any case for subdir in [('.hidden',), ('dir', 'subgit')]: assert_false(exists(get_metapath(topdir, *subdir))) # check if its updated in its nodes sublist too. used by web-ui json. regression test assert_equal(dsj['nodes'][0]['size']['total'], dsj['size']['total']) # check size of subdataset subds = [item for item in dsj['nodes'] if item['name'] == ('subdsfile.txt' or 'subds')][0] assert_equal(subds['size']['total'], '3 Bytes') # dir/subds2 must not be listed among nodes of the top dataset: topds_nodes = {x['name']: x for x in dsj['nodes']} assert_in('subds', topds_nodes) # XXX # # condition here is a bit a guesswork by yoh later on # # TODO: here and below clear destiny/interaction of all_ and recursive # assert_equal(dsj['size']['total'], # '15 Bytes' if (recursive and all_) else # ('9 Bytes' if (recursive or all_) else '3 Bytes') # ) # https://github.com/datalad/datalad/issues/1674 if state == 'file' and all_: dirj = get_meta(topdir, 'dir') dir_nodes = {x['name']: x for x in dirj['nodes']} # it should be present in the subdir meta assert_in('subds2', dir_nodes) assert_not_in('url_external', dir_nodes['subds2']) assert_in('subgit_ext', dir_nodes) assert_equal(dir_nodes['subgit_ext']['url'], ext_url) # and not in topds assert_not_in('subds2', topds_nodes) # run non-recursive dataset traversal after subdataset metadata already created # to verify sub-dataset metadata being picked up from its metadata file in such cases if state == 'file' and recursive and not all_: dsj = _ls_json(topdir, json='file', all_=False) subds = [ item for item in dsj['nodes'] if item['name'] == ('subdsfile.txt' or 'subds') ][0] assert_equal(subds['size']['total'], '3 Bytes') assert_equal( topds_nodes['fromweb']['size']['total'], UNKNOWN_SIZE )
def test_get_versioned_url_anon(): # The one without any authenticator, was crashing. # Also it triggered another bug about having . in the bucket name url_on = "http://openneuro.org.s3.amazonaws.com/ds000001/dataset_description.json" url_on_versioned = get_versioned_url(url_on) ok_startswith(url_on_versioned, url_on + "?versionId=")
def test_run_datalad_help(): out, err = check_run_and_get_output("datalad --help") ok_startswith(out, "Usage: ") eq_(err, "")
def test_wtf(path): # smoke test for now with swallow_outputs() as cmo: wtf(dataset=path) assert_not_in('## dataset', cmo.out) assert_in('## configuration', cmo.out) # Those sections get sensored out by default now assert_not_in('user.name: ', cmo.out) with chpwd(path): with swallow_outputs() as cmo: wtf() assert_not_in('## dataset', cmo.out) assert_in('## configuration', cmo.out) # now with a dataset ds = create(path) with swallow_outputs() as cmo: wtf(dataset=ds.path) assert_in('## configuration', cmo.out) assert_in('## dataset', cmo.out) assert_in('path: {}'.format(ds.path), cmo.out) # and if we run with all sensitive for sensitive in ('some', True): with swallow_outputs() as cmo: wtf(dataset=ds.path, sensitive=sensitive) # we fake those for tests anyways, but we do show cfg in this mode # and explicitly not showing them assert_in('user.name: %s' % _HIDDEN, cmo.out) with swallow_outputs() as cmo: wtf(dataset=ds.path, sensitive='all') assert_not_in(_HIDDEN, cmo.out) # all is shown assert_in('user.name: ', cmo.out) # Sections selection # # If we ask for no sections and there is no dataset with chpwd(path): with swallow_outputs() as cmo: wtf(sections=[]) assert_not_in('## dataset', cmo.out) for s in SECTION_CALLABLES: assert_not_in('## %s' % s.lower(), cmo.out.lower()) # ask for a selected set secs = ['git-annex', 'configuration'] with chpwd(path): with swallow_outputs() as cmo: wtf(sections=secs) for s in SECTION_CALLABLES: (assert_in if s in secs else assert_not_in)('## %s' % s.lower(), cmo.out.lower()) # order should match our desired one, not alphabetical assert cmo.out.index('## git-annex') < cmo.out.index( '## configuration') # not achievable from cmdline is to pass an empty list of sections. with chpwd(path): with swallow_outputs() as cmo: wtf(sections=[]) eq_(cmo.out.rstrip(), '# WTF') # and we could decorate it nicely for embedding e.g. into github issues with swallow_outputs() as cmo: wtf(sections=['dependencies'], decor='html_details') ok_startswith(cmo.out, '<details><summary>DataLad %s WTF' % __version__) assert_in('## dependencies', cmo.out) # should result only in '# WTF' skip_if_no_module('pyperclip') # verify that it works correctly in the env/platform import pyperclip with swallow_outputs() as cmo: try: pyperclip.copy("xxx") pyperclip_works = pyperclip.paste().strip() == "xxx" wtf(dataset=ds.path, clipboard=True) except (AttributeError, pyperclip.PyperclipException) as exc: # AttributeError could come from pyperclip if no DISPLAY raise SkipTest(exc_str(exc)) assert_in("WTF information of length", cmo.out) assert_not_in('user.name', cmo.out) if not pyperclip_works: # Some times does not throw but just fails to work raise SkipTest( "Pyperclip seems to be not functioning here correctly") assert_not_in('user.name', pyperclip.paste()) assert_in(_HIDDEN, pyperclip.paste()) # by default no sensitive info assert_in("cmd:annex:", pyperclip.paste()) # but the content is there
def test_wtf(path): # smoke test for now with swallow_outputs() as cmo: wtf(dataset=path) assert_not_in('## dataset', cmo.out) assert_in('## configuration', cmo.out) # Those sections get sensored out by default now assert_not_in('user.name: ', cmo.out) with chpwd(path): with swallow_outputs() as cmo: wtf() assert_not_in('## dataset', cmo.out) assert_in('## configuration', cmo.out) # now with a dataset ds = create(path) with swallow_outputs() as cmo: wtf(dataset=ds.path) assert_in('## configuration', cmo.out) assert_in('## dataset', cmo.out) assert_in('path: {}'.format(ds.path), cmo.out) # and if we run with all sensitive for sensitive in ('some', True): with swallow_outputs() as cmo: wtf(dataset=ds.path, sensitive=sensitive) # we fake those for tests anyways, but we do show cfg in this mode # and explicitly not showing them assert_in('user.name: %s' % _HIDDEN, cmo.out) with swallow_outputs() as cmo: wtf(dataset=ds.path, sensitive='all') assert_not_in(_HIDDEN, cmo.out) # all is shown assert_in('user.name: ', cmo.out) # Sections selection # # If we ask for no sections and there is no dataset with chpwd(path): with swallow_outputs() as cmo: wtf(sections=[]) assert_not_in('## dataset', cmo.out) for s in SECTION_CALLABLES: assert_not_in('## %s' % s.lower(), cmo.out.lower()) # ask for a selected set secs = ['git-annex', 'configuration'] with chpwd(path): with swallow_outputs() as cmo: wtf(sections=secs) for s in SECTION_CALLABLES: (assert_in if s in secs else assert_not_in)( '## %s' % s.lower(), cmo.out.lower() ) # order should match our desired one, not alphabetical assert cmo.out.index('## git-annex') < cmo.out.index('## configuration') # not achievable from cmdline is to pass an empty list of sections. with chpwd(path): with swallow_outputs() as cmo: wtf(sections=[]) eq_(cmo.out.rstrip(), '# WTF') # and we could decorate it nicely for embedding e.g. into github issues with swallow_outputs() as cmo: wtf(sections=['dependencies'], decor='html_details') ok_startswith(cmo.out, '<details><summary>DataLad %s WTF' % __version__) assert_in('## dependencies', cmo.out) # should result only in '# WTF' skip_if_no_module('pyperclip') # verify that it works correctly in the env/platform import pyperclip with swallow_outputs() as cmo: try: pyperclip.copy("xxx") pyperclip_works = pyperclip.paste().strip() == "xxx" wtf(dataset=ds.path, clipboard=True) except (AttributeError, pyperclip.PyperclipException) as exc: # AttributeError could come from pyperclip if no DISPLAY raise SkipTest(exc_str(exc)) assert_in("WTF information of length", cmo.out) assert_not_in('user.name', cmo.out) if not pyperclip_works: # Some times does not throw but just fails to work raise SkipTest( "Pyperclip seems to be not functioning here correctly") assert_not_in('user.name', pyperclip.paste()) assert_in(_HIDDEN, pyperclip.paste()) # by default no sensitive info assert_in("cmd:annex:", pyperclip.paste()) # but the content is there
def test_with_tempfile_specified_prefix(d1): ok_startswith(basename(d1), 'nodatalad_') ok_('test_with_tempfile_specified_prefix' not in d1)
def test_ls_json(topdir, topurl): annex = AnnexRepo(topdir, create=True) ds = Dataset(topdir) # create some file and commit it with open(opj(ds.path, 'subdsfile.txt'), 'w') as f: f.write('123') ds.add(path='subdsfile.txt') ds.save("Hello!", version_tag=1) # add a subdataset ds.install('subds', source=topdir) subdirds = ds.create(_path_('dir/subds2'), force=True) subdirds.add('file') git = GitRepo(opj(topdir, 'dir', 'subgit'), create=True) # create git repo git.add(opj(topdir, 'dir', 'subgit', 'fgit.txt')) # commit to git to init git repo git.commit() annex.add(opj(topdir, 'dir', 'subgit')) # add the non-dataset git repo to annex annex.add(opj(topdir, 'dir')) # add to annex (links) annex.drop(opj(topdir, 'dir', 'subdir', 'file2.txt'), options=['--force']) # broken-link annex.commit() git.add('fgit.txt') # commit to git to init git repo git.commit() # annex.add doesn't add submodule, so using ds.add ds.add(opj('dir', 'subgit')) # add the non-dataset git repo to annex ds.add('dir') # add to annex (links) ds.drop(opj('dir', 'subdir', 'file2.txt'), check=False) # broken-link # register "external" submodule by installing and uninstalling it ext_url = topurl + '/dir/subgit/.git' # need to make it installable via http Runner()('git update-server-info', cwd=opj(topdir, 'dir', 'subgit')) ds.install(opj('dir', 'subgit_ext'), source=ext_url) ds.uninstall(opj('dir', 'subgit_ext')) meta_dir = opj('.git', 'datalad', 'metadata') def get_metahash(*path): if not path: path = ['/'] return hashlib.md5(opj(*path).encode('utf-8')).hexdigest() def get_metapath(dspath, *path): return _path_(dspath, meta_dir, get_metahash(*path)) def get_meta(dspath, *path): with open(get_metapath(dspath, *path)) as f: return js.load(f) # Let's see that there is no crash if one of the files is available only # in relaxed URL mode, so no size could be picked up ds.repo.add_url_to_file('fromweb', topurl + '/noteventhere', options=['--relaxed']) for all_ in [True, False]: # recurse directories for recursive in [True, False]: for state in ['file', 'delete']: # subdataset should have its json created and deleted when # all=True else not subds_metapath = get_metapath(opj(topdir, 'subds')) exists_prior = exists(subds_metapath) #with swallow_logs(), swallow_outputs(): dsj = _ls_json(topdir, json=state, all_=all_, recursive=recursive) ok_startswith(dsj['tags'], '1-') exists_post = exists(subds_metapath) # print("%s %s -> %s" % (state, exists_prior, exists_post)) assert_equal(exists_post, (state == 'file' and recursive)) # root should have its json file created and deleted in all cases ds_metapath = get_metapath(topdir) assert_equal(exists(ds_metapath), state == 'file') # children should have their metadata json's created and deleted only when recursive=True child_metapath = get_metapath(topdir, 'dir', 'subdir') assert_equal(exists(child_metapath), (state == 'file' and all_)) # ignored directories should not have json files created in any case for subdir in [('.hidden', ), ('dir', 'subgit')]: assert_false(exists(get_metapath(topdir, *subdir))) # check if its updated in its nodes sublist too. used by web-ui json. regression test assert_equal(dsj['nodes'][0]['size']['total'], dsj['size']['total']) # check size of subdataset subds = [ item for item in dsj['nodes'] if item['name'] == ('subdsfile.txt' or 'subds') ][0] assert_equal(subds['size']['total'], '3 Bytes') # dir/subds2 must not be listed among nodes of the top dataset: topds_nodes = {x['name']: x for x in dsj['nodes']} assert_in('subds', topds_nodes) # XXX # # condition here is a bit a guesswork by yoh later on # # TODO: here and below clear destiny/interaction of all_ and recursive # assert_equal(dsj['size']['total'], # '15 Bytes' if (recursive and all_) else # ('9 Bytes' if (recursive or all_) else '3 Bytes') # ) # https://github.com/datalad/datalad/issues/1674 if state == 'file' and all_: dirj = get_meta(topdir, 'dir') dir_nodes = {x['name']: x for x in dirj['nodes']} # it should be present in the subdir meta assert_in('subds2', dir_nodes) assert_not_in('url_external', dir_nodes['subds2']) assert_in('subgit_ext', dir_nodes) assert_equal(dir_nodes['subgit_ext']['url'], ext_url) # and not in topds assert_not_in('subds2', topds_nodes) # run non-recursive dataset traversal after subdataset metadata already created # to verify sub-dataset metadata being picked up from its metadata file in such cases if state == 'file' and recursive and not all_: dsj = _ls_json(topdir, json='file', all_=False) subds = [ item for item in dsj['nodes'] if item['name'] == ('subdsfile.txt' or 'subds') ][0] assert_equal(subds['size']['total'], '3 Bytes') assert_equal(topds_nodes['fromweb']['size']['total'], UNKNOWN_SIZE)
def test_addurls(self, path): ds = Dataset(path).create(force=True) def get_annex_commit_counts(): return len(ds.repo.get_revisions("git-annex")) n_annex_commits = get_annex_commit_counts() # Meanwhile also test that we can specify path relative # to the top of the dataset, as we generally treat paths in # Python API, and it will be the one saved in commit # message record json_file = op.relpath(self.json_file, ds.path) ds.addurls(json_file, "{url}", "{name}", exclude_autometa="(md5sum|size)") ok_startswith(ds.repo.format_commit('%b', DEFAULT_BRANCH), f"url_file='{json_file}'") filenames = ["a", "b", "c"] for fname in filenames: ok_exists(op.join(ds.path, fname)) for (fname, meta), subdir in zip(ds.repo.get_metadata(filenames), ["foo", "bar", "foo"]): assert_dict_equal(meta, {"subdir": [subdir], "name": [fname]}) # Ignore this check if we're faking dates because that disables # batch mode. # Also ignore if on Windows as it seems as if a git-annex bug # leads to separate meta data commits: # https://github.com/datalad/datalad/pull/5202#discussion_r535429704 if not (dl_cfg.get('datalad.fake-dates') or on_windows): # We should have two new commits on the git-annex: one for the # added urls and one for the added metadata. eq_(n_annex_commits + 2, get_annex_commit_counts()) # Add to already existing links, overwriting. with swallow_logs(new_level=logging.DEBUG) as cml: ds.addurls(self.json_file, "{url}", "{name}", ifexists="overwrite") for fname in filenames: assert_in("Removing {}".format(os.path.join(path, fname)), cml.out) # Add to already existing links, skipping. assert_in_results(ds.addurls(self.json_file, "{url}", "{name}", ifexists="skip"), action="addurls", status="notneeded") # Add to already existing links works, as long content is the same. ds.addurls(self.json_file, "{url}", "{name}") # But it fails if something has changed. ds.unlock("a") with open(op.join(ds.path, "a"), "w") as ofh: ofh.write("changed") ds.save("a") assert_raises(IncompleteResultsError, ds.addurls, self.json_file, "{url}", "{name}")