def test_install_known_subdataset(src, path): # get the superdataset: ds = install(path, source=src) # subdataset not installed: subds = Dataset(opj(path, 'subm 1')) assert_false(subds.is_installed()) assert_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths')) assert_not_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths')) # install it: ds.install('subm 1') ok_(subds.is_installed()) ok_(AnnexRepo.is_valid_repo(subds.path, allow_noninitialized=False)) # Verify that it is the correct submodule installed and not # new repository initiated eq_(set(subds.repo.get_indexed_files()), {'test.dat', 'INFO.txt', 'test-annex.dat'}) assert_not_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths')) assert_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths')) # now, get the data by reinstalling with -g: ok_(subds.repo.file_has_content('test-annex.dat') is False) with chpwd(ds.path): result = get(path='subm 1', dataset=os.curdir) assert_in_results(result, path=opj(subds.path, 'test-annex.dat')) ok_(subds.repo.file_has_content('test-annex.dat') is True) ok_(subds.is_installed())
def test_install_skip_list_arguments(src, path, path_outside): ds = install(path, source=src) ok_(ds.is_installed()) # install a list with valid and invalid items: with swallow_logs(new_level=logging.WARNING) as cml: with assert_raises(IncompleteResultsError) as cme: ds.install( path=['subm 1', 'not_existing', path_outside, 'subm 2'], get_data=False) result = cme.exception.results for skipped in [opj(ds.path, 'not_existing'), path_outside]: cml.assert_logged(msg="ignored non-existing paths: {}\n".format( [opj(ds.path, 'not_existing'), path_outside]), regex=False, level='WARNING') pass ok_(isinstance(result, list)) eq_(len(result), 2) for sub in [Dataset(opj(path, 'subm 1')), Dataset(opj(path, 'subm 2'))]: assert_in(sub, result) ok_(sub.is_installed()) # return of get is always a list, even if just one thing was gotten # in this case 'subm1' was already obtained above, so this will get this # content of the subdataset with assert_raises(IncompleteResultsError) as cme: ds.install(path=['subm 1', 'not_existing']) with assert_raises(IncompleteResultsError) as cme: ds.get(path=['subm 1', 'not_existing']) result = cme.exception.results eq_(len(result), 1) eq_(result[0]['file'], 'subm 1/test-annex.dat')
def test_install_into_dataset(source, top_path): ds = create(top_path) ok_clean_git(ds.path) subds = ds.install("sub", source=source, save=False) if isinstance(subds.repo, AnnexRepo) and subds.repo.is_direct_mode(): ok_(exists(opj(subds.path, '.git'))) else: ok_(isdir(opj(subds.path, '.git'))) ok_(subds.is_installed()) assert_in('sub', ds.subdatasets(result_xfm='relpaths')) # sub is clean: ok_clean_git(subds.path, annex=None) # top is too: ok_clean_git(ds.path, annex=None) ds.save('addsub') # now it is: ok_clean_git(ds.path, annex=None) # but we could also save while installing and there should be no side-effect # of saving any other changes if we state to not auto-save changes # Create a dummy change create_tree(ds.path, {'dummy.txt': 'buga'}) ok_clean_git(ds.path, untracked=['dummy.txt']) subds_ = ds.install("sub2", source=source) eq_(subds_.path, opj(ds.path, "sub2")) # for paranoid yoh ;) ok_clean_git(ds.path, untracked=['dummy.txt']) # and we should achieve the same behavior if we create a dataset # and then decide to add it create(_path_(top_path, 'sub3')) ok_clean_git(ds.path, untracked=['dummy.txt', 'sub3/']) ds.add('sub3') ok_clean_git(ds.path, untracked=['dummy.txt'])
def test_ssh_open_close(tfile1): manager = SSHManager() path = opj(manager.socket_dir, get_connection_hash('localhost')) # TODO: facilitate the test when it didn't exist existed_before = exists(path) print("%s existed: %s" % (path, existed_before)) c1 = manager.get_connection('ssh://localhost') c1.open() # control master exists for sure now ok_(exists(path)) # use connection to execute remote command: local_home = os.path.expanduser('~') # we list explicitly local HOME since we override it in module_setup out, err = c1('ls -a %r' % local_home) remote_ls = [entry for entry in out.splitlines() if entry != '.' and entry != '..'] local_ls = os.listdir(local_home) eq_(set(remote_ls), set(local_ls)) # now test for arguments containing spaces and other pleasant symbols out, err = c1('ls -l {}'.format(sh_quote(tfile1))) assert_in(tfile1, out) eq_(err, '') c1.close() # control master doesn't exist anymore: ok_(exists(path) == existed_before)
def test_install_crcns(tdir, ds_path): with chpwd(tdir): with swallow_logs(new_level=logging.INFO) as cml: install("all-nonrecursive", source='///') # since we didn't log decorations such as log level atm while # swallowing so lets check if exit code is returned or not # I will test both assert_not_in('ERROR', cml.out) # below one must not fail alone! ;) assert_not_in('with exit code', cml.out) # should not hang in infinite recursion with chpwd('all-nonrecursive'): get("crcns") ok_(exists(_path_("all-nonrecursive/crcns/.git/config"))) # and we could repeat installation and get the same result ds1 = install(_path_("all-nonrecursive/crcns")) ds2 = Dataset('all-nonrecursive').install('crcns') ok_(ds1.is_installed()) eq_(ds1, ds2) eq_(ds1.path, ds2.path) # to make sure they are a single dataset # again, but into existing dataset: ds = create(ds_path) crcns = ds.install("///crcns") ok_(crcns.is_installed()) eq_(crcns.path, opj(ds_path, "crcns")) assert_in(crcns.path, ds.get_subdatasets(absolute=True))
def test_surprise_subds(path): # https://github.com/datalad/datalad/issues/3139 ds = create(path, force=True) # a lonely repo without any commit somerepo = AnnexRepo(path=op.join(path, 'd1', 'subrepo'), create=True) # a proper subdataset subds = create(op.join(path, 'd2', 'subds'), force=True) # save non-recursive ds.save(recursive=False) # the content of both subds and subrepo are not added to their # respective parent as no --recursive was given assert_repo_status(subds.path, untracked=['subfile']) assert_repo_status(somerepo.path, untracked=['subfile']) # however, while the subdataset is added (and reported as modified # because it content is still untracked) the subrepo # cannot be added (it has no commit) # worse: its untracked file add been added to the superdataset # XXX the next conditional really says: if the subrepo is not in an # adjusted branch: #datalad/3178 (that would have a commit) if not on_windows: assert_repo_status(ds.path, modified=['d2/subds']) assert_in(ds.repo.pathobj / 'd1' / 'subrepo' / 'subfile', ds.repo.get_content_info()) # with proper subdatasets, all evil is gone assert_not_in(ds.repo.pathobj / 'd2' / 'subds' / 'subfile', ds.repo.get_content_info())
def test_ssh_custom_identity_file(): ifile = "/tmp/dl-test-ssh-id" # Travis if not op.exists(ifile): raise SkipTest("Travis-specific '{}' identity file does not exist" .format(ifile)) from datalad import cfg try: with patch.dict("os.environ", {"DATALAD_SSH_IDENTITYFILE": ifile}): cfg.reload(force=True) with swallow_logs(new_level=logging.DEBUG) as cml: manager = SSHManager() ssh = manager.get_connection('ssh://localhost') cmd_out, _ = ssh("echo blah") expected_socket = op.join( text_type(manager.socket_dir), get_connection_hash("localhost", identity_file=ifile, bundled=True)) ok_(exists(expected_socket)) manager.close() assert_in("-i", cml.out) assert_in(ifile, cml.out) finally: # Prevent overridden DATALAD_SSH_IDENTITYFILE from lingering. cfg.reload(force=True)
def test_add_mimetypes(path): ds = Dataset(path).create(force=True) ds.repo.add('.gitattributes') ds.repo.commit('added attributes to git explicitly') # now test that those files will go into git/annex correspondingly # WINDOWS FAILURE NEXT __not_tested__ = ds.save(['file.txt', 'empty']) assert_repo_status(path, untracked=['file2.txt']) # But we should be able to force adding file to annex when desired ds.save('file2.txt', to_git=False) # check annex file status annexinfo = ds.repo.get_content_annexinfo() for path, in_annex in ( # Empty one considered to be application/octet-stream # i.e. non-text ('empty', True), ('file.txt', False), ('file2.txt', True)): # low-level API report -> repo path reference, no ds path p = ds.repo.pathobj / path assert_in(p, annexinfo) if in_annex: assert_in('key', annexinfo[p], p) else: assert_not_in('key', annexinfo[p], p)
def test_add_subdataset(path, other): subds = create(op.join(path, 'dir'), force=True) ds = create(path, force=True) ok_(subds.repo.dirty) ok_(ds.repo.dirty) assert_not_in('dir', ds.subdatasets(result_xfm='relpaths')) # "add everything in subds to subds" save(dataset=subds.path) assert_repo_status(subds.path) assert_not_in('dir', ds.subdatasets(result_xfm='relpaths')) # but with a base directory we add the dataset subds as a subdataset # to ds res = ds.save(subds.path) assert_in_results(res, action="add", path=subds.path, refds=ds.path) assert_in('dir', ds.subdatasets(result_xfm='relpaths')) # create another one other = create(other) # install into superdataset, but don't add other_clone = install(source=other.path, path=op.join(ds.path, 'other')) # little dance to get the revolution-type dataset other_clone = Dataset(other_clone.path) ok_(other_clone.is_installed) assert_not_in('other', ds.subdatasets(result_xfm='relpaths')) # now add, it should pick up the source URL ds.save('other') # and that is why, we can reobtain it from origin ds.uninstall('other') ok_(not other_clone.is_installed()) ds.get('other') ok_(other_clone.is_installed())
def test_notclone_known_subdataset(src, path): # get the superdataset: ds = clone(src, path, result_xfm='datasets', return_type='item-or-list') # subdataset not installed: subds = Dataset(opj(path, 'subm 1')) assert_false(subds.is_installed()) assert_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths')) assert_not_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths')) # clone is not meaningful res = ds.clone('subm 1', on_failure='ignore') assert_status('error', res) assert_message('Failed to clone from any candidate source URL. ' 'Encountered errors per each url were: %s', res) # get does the job res = ds.get(path='subm 1', get_data=False) assert_status('ok', res) ok_(subds.is_installed()) ok_(AnnexRepo.is_valid_repo(subds.path, allow_noninitialized=False)) # Verify that it is the correct submodule installed and not # new repository initiated eq_(set(subds.repo.get_indexed_files()), {'test.dat', 'INFO.txt', 'test-annex.dat'}) assert_not_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths')) assert_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths'))
def test_install_recursive_repeat(src, path): subsub_src = Dataset(opj(src, 'sub 1', 'subsub')).create(force=True) sub1_src = Dataset(opj(src, 'sub 1')).create(force=True) sub2_src = Dataset(opj(src, 'sub 2')).create(force=True) top_src = Dataset(src).create(force=True) top_src.add('.', recursive=True) ok_clean_git(top_src.path) # install top level: top_ds = install(path, source=src) ok_(top_ds.is_installed() is True) sub1 = Dataset(opj(path, 'sub 1')) ok_(sub1.is_installed() is False) sub2 = Dataset(opj(path, 'sub 2')) ok_(sub2.is_installed() is False) subsub = Dataset(opj(path, 'sub 1', 'subsub')) ok_(subsub.is_installed() is False) # install again, now with data and recursive, but recursion_limit 1: result = get(os.curdir, dataset=path, recursive=True, recursion_limit=1, result_xfm='datasets') # top-level dataset was not reobtained assert_not_in(top_ds, result) assert_in(sub1, result) assert_in(sub2, result) assert_not_in(subsub, result) ok_(top_ds.repo.file_has_content('top_file.txt') is True) ok_(sub1.repo.file_has_content('sub1file.txt') is True) ok_(sub2.repo.file_has_content('sub2file.txt') is True) # install sub1 again, recursively and with data top_ds.install('sub 1', recursive=True, get_data=True) ok_(subsub.is_installed()) ok_(subsub.repo.file_has_content('subsubfile.txt'))
def test_help_np(): stdout, stderr = run_main(['--help-np']) # Let's extract section titles: # enough of bin/datalad and .tox/py27/bin/datalad -- guarantee consistency! ;) ok_startswith(stdout, 'Usage: datalad') # Sections start/end with * if ran under DATALAD_HELP2MAN mode sections = [l[1:-1] for l in filter(re.compile('^\*.*\*$').match, stdout.split('\n'))] # but order is still not guaranteed (dict somewhere)! TODO # see https://travis-ci.org/datalad/datalad/jobs/80519004 # thus testing sets for s in {'Commands for dataset operations', 'Commands for metadata handling', 'Miscellaneous commands', 'General information', 'Global options', 'Plumbing commands', 'Plugins'}: assert_in(s, sections) # none of the lines must be longer than 80 chars # TODO: decide on create-sibling and possibly # rewrite-urls accepted_width = get_console_width() long_lines = ["%d %s" % (len(l), l) for l in stdout.split('\n') if len(l) > accepted_width and '{' not in l # on nd70 summary line is unsplit ] if long_lines: raise AssertionError( "Following lines in --help output were longer than %s chars:\n%s" % (accepted_width, '\n'.join(long_lines)) )
def test_publish_file_handle(origin, src_path, dst_path): # prepare src source = install(path=src_path, source=origin, recursive=True) # TODO: For now, circumnavigate the detached head issue. # Figure out, what to do. for subds in source.get_dataset_handles(recursive=True): AnnexRepo(opj(src_path, subds), init=True, create=True).git_checkout("master") source.repo.get('test-annex.dat') # create plain git at target: target = AnnexRepo(dst_path, create=True) # actually not needed for this test, but provide same setup as # everywhere else: target.git_checkout("TMP", "-b") source.repo.git_remote_add("target", dst_path) # directly publish a file handle, not the dataset itself: res = publish(dataset=source, dest="target", path="test-annex.dat") eq_(res, opj(source.path, 'test-annex.dat')) # only file was published, not the dataset itself: assert_not_in("master", target.git_get_branches()) eq_(Dataset(dst_path).get_dataset_handles(), []) assert_not_in("test.dat", target.git_get_files()) # content is now available from 'target': assert_in("target", source.repo.annex_whereis('test-annex.dat', output="descriptions")) source.repo.annex_drop('test-annex.dat') eq_(source.repo.file_has_content(['test-annex.dat']), [False]) source.repo._run_annex_command('get', annex_options=['test-annex.dat', '--from=target']) eq_(source.repo.file_has_content(['test-annex.dat']), [True])
def test_our_metadataset_search(tdir): # TODO renable when a dataset with new aggregated metadata is # available at some public location raise SkipTest # smoke test for basic search operations on our super-megadataset # expensive operation but ok #ds = install( # path=tdir, # # TODO renable test when /// metadata actually conforms to the new metadata # #source="///", # source="smaug:/mnt/btrfs/datasets-meta6-4/datalad/crawl", # result_xfm='datasets', return_type='item-or-list') assert list(ds.search('haxby')) assert_result_count( ds.search('id:873a6eae-7ae6-11e6-a6c8-002590f97d84', mode='textblob'), 1, type='dataset', path=opj(ds.path, 'crcns', 'pfc-2')) # there is a problem with argparse not decoding into utf8 in PY2 from datalad.cmdline.tests.test_main import run_main # TODO: make it into an independent lean test from datalad.cmd import Runner out, err = Runner(cwd=ds.path)('datalad search Buzsáki') assert_in('crcns/pfc-2 ', out) # has it in description # and then another aspect: this entry it among multiple authors, need to # check if aggregating them into a searchable entity was done correctly assert_in('crcns/hc-1 ', out)
def _check_ri(ri, cls, exact_str=True, localpath=None, **fields): """just a helper to carry out few checks on urls""" with swallow_logs(new_level=logging.DEBUG) as cml: ri_ = cls(**fields) murl = RI(ri) eq_(murl.__class__, cls) # not just a subclass eq_(murl, ri_) eq_(str(RI(ri)), ri) eq_(eval(repr(ri_)), ri) # repr leads back to identical ri_ eq_(ri, ri_) # just in case ;) above should fail first if smth is wrong if not exact_str: assert_in('Parsed version of', cml.out) (eq_ if exact_str else neq_)(ri, str(ri_)) # that we can reconstruct it EXACTLY on our examples # and that we have access to all those fields nok_(set(fields).difference(set(cls._FIELDS))) for f, v in fields.items(): eq_(getattr(ri_, f), v) if localpath: eq_(ri_.localpath, localpath) old_localpath = ri_.localpath # for a test below else: # if not given -- must be a remote url, should raise exception with assert_raises(ValueError): ri_.localpath # do changes in the path persist? old_str = str(ri_) ri_.path = newpath = opj(ri_.path, 'sub') eq_(ri_.path, newpath) neq_(str(ri_), old_str) if localpath: eq_(ri_.localpath, opj(old_localpath, 'sub'))
def test_status_basics(path, linkpath, otherdir): if not on_windows: # make it more complicated by default ut.Path(linkpath).symlink_to(path, target_is_directory=True) path = linkpath with chpwd(path): assert_raises(NoDatasetArgumentFound, status) ds = Dataset(path).create() # outcome identical between ds= and auto-discovery with chpwd(path): assert_raises(IncompleteResultsError, status, path=otherdir) stat = status() eq_(stat, ds.status()) assert_status('ok', stat) # we have a bunch of reports (be vague to be robust to future changes assert len(stat) > 2 # check the composition for s in stat: eq_(s['status'], 'ok') eq_(s['action'], 'status') eq_(s['state'], 'clean') eq_(s['type'], 'file') assert_in('gitshasum', s) assert_in('bytesize', s) eq_(s['refds'], ds.path)
def test_globbedpaths(path): dotdir = op.curdir + op.sep for patterns, expected in [ (["1.txt", "2.dat"], {"1.txt", "2.dat"}), ([dotdir + "1.txt", "2.dat"], {dotdir + "1.txt", "2.dat"}), (["*.txt", "*.dat"], {"1.txt", "2.dat", u"bβ.dat", "3.txt"}), ([dotdir + "*.txt", "*.dat"], {dotdir + "1.txt", "2.dat", u"bβ.dat", dotdir + "3.txt"}), (["subdir/*.txt"], {"subdir/1.txt", "subdir/2.txt"}), ([dotdir + "subdir/*.txt"], {dotdir + p for p in ["subdir/1.txt", "subdir/2.txt"]}), (["*.txt"], {"1.txt", "3.txt"})]: gp = GlobbedPaths(patterns, pwd=path) eq_(set(gp.expand()), expected) eq_(set(gp.expand(full=True)), {op.join(path, p) for p in expected}) pardir = op.pardir + op.sep subdir_path = op.join(path, "subdir") for patterns, expected in [ (["*.txt"], {"1.txt", "2.txt"}), ([dotdir + "*.txt"], {dotdir + p for p in ["1.txt", "2.txt"]}), ([pardir + "*.txt"], {pardir + p for p in ["1.txt", "3.txt"]}), ([dotdir + pardir + "*.txt"], {dotdir + pardir + p for p in ["1.txt", "3.txt"]}), (["subdir/"], {"subdir/"})]: gp = GlobbedPaths(patterns, pwd=subdir_path) eq_(set(gp.expand()), expected) eq_(set(gp.expand(full=True)), {op.join(subdir_path, p) for p in expected}) # Full patterns still get returned as relative to pwd. gp = GlobbedPaths([op.join(path, "*.dat")], pwd=path) eq_(gp.expand(), ["2.dat", u"bβ.dat"]) # "." gets special treatment. gp = GlobbedPaths([".", "*.dat"], pwd=path) eq_(set(gp.expand()), {"2.dat", u"bβ.dat", "."}) eq_(gp.expand(dot=False), ["2.dat", u"bβ.dat"]) gp = GlobbedPaths(["."], pwd=path, expand=False) eq_(gp.expand(), ["."]) eq_(gp.paths, ["."]) # We can the glob outputs. glob_results = {"z": "z", "a": ["x", "d", "b"]} with patch('glob.glob', glob_results.get): gp = GlobbedPaths(["z", "a"]) eq_(gp.expand(), ["z", "b", "d", "x"]) # glob expansion for paths property is determined by expand argument. for expand, expected in [(True, ["2.dat", u"bβ.dat"]), (False, ["*.dat"])]: gp = GlobbedPaths(["*.dat"], pwd=path, expand=expand) eq_(gp.paths, expected) with swallow_logs(new_level=logging.DEBUG) as cml: GlobbedPaths(["not here"], pwd=path).expand() assert_in("No matching files found for 'not here'", cml.out)
def test_dont_trip_over_missing_subds(path): ds1 = Dataset(opj(path, 'ds1')).create() ds2 = Dataset(opj(path, 'ds2')).create() subds2 = ds1.install( source=ds2.path, path='subds2', result_xfm='datasets', return_type='item-or-list') assert_true(subds2.is_installed()) assert_in('subds2', ds1.subdatasets(result_xfm='relpaths')) subds2.uninstall() assert_in('subds2', ds1.subdatasets(result_xfm='relpaths')) assert_false(subds2.is_installed()) # see if it wants to talk to github (and fail), or if it trips over something # before assert_raises(gh.BadCredentialsException, ds1.create_sibling_github, 'bogus', recursive=True, github_login='******') # inject remote config prior run assert_not_in('github', ds1.repo.get_remotes()) # fail on existing ds1.repo.add_remote('github', 'http://nothere') assert_raises(ValueError, ds1.create_sibling_github, 'bogus', recursive=True, github_login='******') # talk to github when existing is OK assert_raises(gh.BadCredentialsException, ds1.create_sibling_github, 'bogus', recursive=True, github_login='******', existing='reconfigure') # return happy emptiness when all is skipped assert_equal( ds1.create_sibling_github( 'bogus', recursive=True, github_login='******', existing='skip'), [])
def test_addurls_subdataset(self, path): ds = Dataset(path).create(force=True) with chpwd(path): for save in True, False: label = "save" if save else "nosave" hexsha_before = ds.repo.get_hexsha() ds.addurls(self.json_file, "{url}", "{subdir}-" + label + "//{name}", save=save) hexsha_after = ds.repo.get_hexsha() for fname in ["foo-{}/a", "bar-{}/b", "foo-{}/c"]: ok_exists(fname.format(label)) assert_true(save ^ (hexsha_before == hexsha_after)) assert_true(save ^ ds.repo.dirty) # Now save the "--nosave" changes and check that we have # all the subdatasets. ds.add(".") eq_(set(subdatasets(ds, recursive=True, result_xfm="relpaths")), {"foo-save", "bar-save", "foo-nosave", "bar-nosave"}) # We don't try to recreate existing subdatasets. with swallow_logs(new_level=logging.DEBUG) as cml: ds.addurls(self.json_file, "{url}", "{subdir}-nosave//{name}") assert_in("Not creating subdataset at existing path", cml.out)
def test_unlock_raises(path, path2, path3): # make sure, we are not within a dataset: _cwd = getpwd() chpwd(path) # no dataset and no path: assert_raises(InsufficientArgumentsError, unlock, dataset=None, path=None) # no dataset and path not within a dataset: with swallow_logs(new_level=logging.WARNING) as cml: unlock(dataset=None, path=path2) assert_in("ignored paths that do not belong to any dataset: ['{0}'".format(path2), cml.out) create(path=path, no_annex=True) ds = Dataset(path) # no complaints ds.unlock() # make it annex, but call unlock with invalid path: AnnexRepo(path, create=True) with swallow_logs(new_level=logging.WARNING) as cml: ds.unlock(path="notexistent.txt") assert_in("ignored non-existing paths", cml.out) chpwd(_cwd)
def test_url_base(): # Basic checks assert_raises(ValueError, URL, "http://example.com", hostname='example.com') url = URL("http://example.com") eq_(url.hostname, 'example.com') eq_(url.scheme, 'http') eq_(url.port, '') # not specified -- empty strings eq_(url.username, '') # not specified -- empty strings eq_(repr(url), "URL(hostname='example.com', scheme='http')") eq_(url, "http://example.com") # automagic coercion in __eq__ neq_(URL(), URL(hostname='x')) smth = URL('smth') eq_(smth.hostname, '') ok_(bool(smth)) nok_(bool(URL())) assert_raises(ValueError, url._set_from_fields, unknown='1') with swallow_logs(new_level=logging.WARNING) as cml: # we don't "care" about params ATM so there is a warning if there are any purl = URL("http://example.com/;param") eq_(str(purl), 'http://example.com/;param') # but we do maintain original string assert_in('ParseResults contains params', cml.out) eq_(purl.as_str(), 'http://example.com/')
def test_rstmanpage_formatter(): parsers = setup_parser(return_subparsers=True) for p in parsers: mp = fmt.RSTManPageFormatter(p).format_man_page(parsers[p]) for section in ('Synopsis', 'Description', 'Options'): assert_in('\n{0}'.format(section), mp) assert_in('{0}\n{1}'.format(p, '=' * len(p)), mp)
def test_search_non_dataset(tdir): from datalad.support.gitrepo import GitRepo GitRepo(tdir, create=True) with assert_raises(NoDatasetArgumentFound) as cme: list(search('smth', dataset=tdir)) # Should instruct user how that repo could become a datalad dataset assert_in("datalad create --force", str(cme.exception))
def test_uninstall_subdataset(src, dst): ds = install(dst, source=src, recursive=True) ok_(ds.is_installed()) known_subdss = ds.subdatasets(result_xfm='datasets') for subds in ds.subdatasets(result_xfm='datasets'): ok_(subds.is_installed()) annexed_files = subds.repo.get_annexed_files() subds.repo.get(annexed_files) # drop data of subds: res = ds.drop(path=subds.path, result_xfm='paths') ok_(all([opj(subds.path, f) in res for f in annexed_files])) ok_(all([not i for i in subds.repo.file_has_content(annexed_files)])) # subdataset is still known assert_in(subds.path, ds.subdatasets(result_xfm='paths')) eq_(ds.subdatasets(result_xfm='datasets'), known_subdss) for subds in ds.subdatasets(result_xfm='datasets'): # uninstall subds itself: if os.environ.get('DATALAD_TESTS_DATALADREMOTE') \ and external_versions['git'] < '2.0.9': raise SkipTest( "Known problem with GitPython. See " "https://github.com/gitpython-developers/GitPython/pull/521") res = ds.uninstall(path=subds.path, result_xfm='datasets') eq_(res[0], subds) ok_(not subds.is_installed()) # just a deinit must not remove the subdataset registration eq_(ds.subdatasets(result_xfm='datasets'), known_subdss) # mountpoint of subdataset should still be there ok_(exists(subds.path))
def test_invalid_call(origin, tdir): ds = Dataset(origin) ds.uninstall('subm 1', check=False) # nothing assert_status('error', publish('/notthere', on_failure='ignore')) # known, but not present assert_status('impossible', publish(opj(ds.path, 'subm 1'), on_failure='ignore')) # --since without dataset is now supported as long as it # could be identified # assert_raises(InsufficientArgumentsError, publish, since='HEAD') # but if it couldn't be, then should indeed crash with chpwd(tdir): assert_raises(InsufficientArgumentsError, publish, since='HEAD') # new dataset, with unavailable subdataset dummy = Dataset(tdir).create() dummy_sub = dummy.create('sub') dummy_sub.uninstall() assert_in('sub', dummy.subdatasets(fulfilled=False, result_xfm='relpaths')) # now an explicit call to publish the unavailable subdataset assert_result_count( dummy.publish('sub', on_failure='ignore'), 1, path=dummy_sub.path, status='impossible', type='dataset')
def test_add_subdataset(path, other): subds = create(opj(path, 'dir'), force=True) ds = create(path, force=True) ok_(subds.repo.dirty) ok_(ds.repo.dirty) assert_not_in('dir', ds.subdatasets(result_xfm='relpaths')) # without a base dataset the next is interpreted as "add everything # in subds to subds" add(subds.path) ok_clean_git(subds.path) assert_not_in('dir', ds.subdatasets(result_xfm='relpaths')) # but with a base directory we add the dataset subds as a subdataset # to ds ds.add(subds.path) assert_in('dir', ds.subdatasets(result_xfm='relpaths')) # create another one other = create(other) # install into superdataset, but don't add other_clone = install(source=other.path, path=opj(ds.path, 'other')) ok_(other_clone.is_installed) assert_not_in('other', ds.subdatasets(result_xfm='relpaths')) # now add, it should pick up the source URL ds.add('other') # and that is why, we can reobtain it from origin ds.uninstall('other') ok_(other_clone.is_installed) ds.get('other') ok_(other_clone.is_installed)
def test_submodule_deinit(path): from datalad.support.annexrepo import AnnexRepo top_repo = AnnexRepo(path, create=False) eq_({'subm 1', '2'}, {s.name for s in top_repo.get_submodules()}) # note: here init=True is ok, since we are using it just for testing with swallow_logs(new_level=logging.WARN) as cml: top_repo.update_submodule('subm 1', init=True) assert_in('Do not use update_submodule with init=True', cml.out) top_repo.update_submodule('2', init=True) # ok_(all([s.module_exists() for s in top_repo.get_submodules()])) # TODO: old assertion above if non-bare? (can't use "direct mode" in test_gitrepo) # Alternatively: New testrepo (plain git submodules) and have a dedicated # test for annexes in addition ok_(all([GitRepo.is_valid_repo(op.join(top_repo.path, s.path)) for s in top_repo.get_submodules()])) # modify submodule: with open(op.join(top_repo.path, 'subm 1', 'file_ut.dat'), "w") as f: f.write("some content") assert_raises(CommandError, top_repo.deinit_submodule, 'sub1') # using force should work: top_repo.deinit_submodule('subm 1', force=True) ok_(not top_repo.repo.submodule('subm 1').module_exists())
def test_GitRepo_ssh_pull(remote_path, repo_path): from datalad import ssh_manager remote_repo = GitRepo(remote_path, create=True) url = "ssh://localhost" + op.abspath(remote_path) socket_path = op.join(ssh_manager.socket_dir, get_connection_hash('localhost')) repo = GitRepo(repo_path, create=True) repo.add_remote("ssh-remote", url) # modify remote: remote_repo.checkout("ssh-test", ['-b']) with open(op.join(remote_repo.path, "ssh_testfile.dat"), "w") as f: f.write("whatever") remote_repo.add("ssh_testfile.dat") remote_repo.commit("ssh_testfile.dat added.") # file is not locally known yet: assert_not_in("ssh_testfile.dat", repo.get_indexed_files()) # pull changes: repo.pull(remote="ssh-remote", refspec=remote_repo.get_active_branch()) ok_clean_git(repo.path, annex=False) # the connection is known to the SSH manager, since fetch() requested it: assert_in(socket_path, ssh_manager._connections) # and socket was created: ok_(op.exists(socket_path)) # we actually pulled the changes assert_in("ssh_testfile.dat", repo.get_indexed_files())
def test_GitRepo_fetch(test_path, orig_path, clone_path): origin = GitRepo.clone(test_path, orig_path) clone = GitRepo.clone(orig_path, clone_path) filename = get_most_obscure_supported_name() origin.checkout("new_branch", ['-b']) with open(op.join(orig_path, filename), 'w') as f: f.write("New file.") origin.add(filename) origin.commit("new file added.") fetched = clone.fetch(remote='origin') # test FetchInfo list returned by fetch eq_([u'origin/' + clone.get_active_branch(), u'origin/new_branch'], [commit.name for commit in fetched]) ok_clean_git(clone.path, annex=False) assert_in("origin/new_branch", clone.get_remote_branches()) assert_in(filename, clone.get_files("origin/new_branch")) assert_false(op.exists(op.join(clone_path, filename))) # not checked out # create a remote without an URL: origin.add_remote('not-available', 'git://example.com/not/existing') origin.config.unset('remote.not-available.url', where='local') # fetch without provided URL fetched = origin.fetch('not-available') # nothing was done, nothing returned: eq_([], fetched)
def test_GitRepo_add(src, path): gr = GitRepo.clone(src, path) filename = get_most_obscure_supported_name() with open(op.join(path, filename), 'w') as f: f.write("File to add to git") added = gr.add(filename) eq_(added, {'success': True, 'file': filename}) assert_in(filename, gr.get_indexed_files(), "%s not successfully added to %s" % (filename, path)) # uncommitted: ok_(gr.dirty) filename = "another.txt" with open(op.join(path, filename), 'w') as f: f.write("Another file to add to git") # include committing: added2 = gr.add(filename) gr.commit(msg="Add two files.") eq_(added2, {'success': True, 'file': filename}) assert_in(filename, gr.get_indexed_files(), "%s not successfully added to %s" % (filename, path)) ok_clean_git(path)
def test_wtf(path): # smoke test for now with swallow_outputs() as cmo: wtf(dataset=path) assert_not_in('Dataset information', cmo.out) assert_in('Configuration', cmo.out) # Those sections get sensored out by default now assert_not_in('user.name: ', cmo.out) with chpwd(path): with swallow_outputs() as cmo: wtf() assert_not_in('Dataset information', cmo.out) assert_in('Configuration', cmo.out) # now with a dataset ds = create(path) with swallow_outputs() as cmo: wtf(dataset=ds.path) assert_in('Configuration', cmo.out) assert_in('Dataset information', cmo.out) assert_in('path: {}'.format(ds.path), cmo.out) # and if we run with all sensitive for sensitive in ('some', True): with swallow_outputs() as cmo: wtf(dataset=ds.path, sensitive=sensitive) # we fake those for tests anyways, but we do show cfg in this mode # and explicitly not showing them assert_in('user.name: %s' % _HIDDEN, cmo.out) with swallow_outputs() as cmo: wtf(dataset=ds.path, sensitive='all') assert_not_in(_HIDDEN, cmo.out) # all is shown assert_in('user.name: ', cmo.out) skip_if_no_module('pyperclip') # verify that it works correctly in the env/platform import pyperclip with swallow_outputs() as cmo: try: pyperclip.copy("xxx") pyperclip_works = pyperclip.paste().strip() == "xxx" wtf(dataset=ds.path, clipboard=True) except (AttributeError, pyperclip.PyperclipException) as exc: # AttributeError could come from pyperclip if no DISPLAY raise SkipTest(exc_str(exc)) assert_in("WTF information of length", cmo.out) assert_not_in('user.name', cmo.out) if not pyperclip_works: # Some times does not throw but just fails to work raise SkipTest( "Pyperclip seems to be not functioning here correctly") assert_not_in('user.name', pyperclip.paste()) assert_in(_HIDDEN, pyperclip.paste()) # by default no sensitive info assert_in("cmd:annex=", pyperclip.paste()) # but the content is there
def test_create_raises(path, outside_path): ds = Dataset(path) # incompatible arguments (annex only): assert_raises(ValueError, ds.create, annex=False, description='some') with open(op.join(path, "somefile.tst"), 'w') as f: f.write("some") # non-empty without `force`: assert_in_results( ds.create(force=False, **raw), status='error', message='will not create a dataset in a non-empty directory, use `force` option to ignore') # non-empty with `force`: ds.create(force=True) # create sub outside of super: assert_in_results( ds.create(outside_path, **raw), status='error', message=( 'dataset containing given paths is not underneath the reference ' 'dataset %s: %s', ds, outside_path)) obscure_ds = u"ds-" + OBSCURE_FILENAME # create a sub: ds.create(obscure_ds) # fail when doing it again assert_in_results( ds.create(obscure_ds, **raw), status='error', message=('collision with %s (dataset) in dataset %s', str(ds.pathobj / obscure_ds), ds.path) ) # now deinstall the sub and fail trying to create a new one at the # same location ds.uninstall(obscure_ds, check=False) assert_in(obscure_ds, ds.subdatasets(fulfilled=False, result_xfm='relpaths')) # and now should fail to also create inplace or under assert_in_results( ds.create(obscure_ds, **raw), status='error', message=('collision with %s (dataset) in dataset %s', str(ds.pathobj / obscure_ds), ds.path) ) assert_in_results( ds.create(op.join(obscure_ds, 'subsub'), **raw), status='error', message=('collision with %s (dataset) in dataset %s', str(ds.pathobj / obscure_ds), ds.path) ) os.makedirs(op.join(ds.path, 'down')) with open(op.join(ds.path, 'down', "someotherfile.tst"), 'w') as f: f.write("someother") ds.save() assert_in_results( ds.create('down', **raw), status='error', message=('collision with content in parent dataset at %s: %s', ds.path, [str(ds.pathobj / 'down' / 'someotherfile.tst')]), )
def test_status(_path, linkpath): # do the setup on the real path, not the symlink, to have its # bugs not affect this test of status() ds = get_deeply_nested_structure(str(_path)) if has_symlink_capability(): # make it more complicated by default ut.Path(linkpath).symlink_to(_path, target_is_directory=True) path = linkpath else: path = _path ds = Dataset(path) if has_symlink_capability(): assert ds.pathobj != ds.repo.pathobj # spotcheck that annex status reporting and availability evaluation # works assert_result_count( ds.status(annex='all', result_renderer=None), 1, path=str(ds.pathobj / 'subdir' / 'annexed_file.txt'), key='MD5E-s5--275876e34cf609db118f3d84b799a790.txt', has_content=True, objloc=str(ds.repo.pathobj / '.git' / 'annex' / 'objects' / # hashdir is different on windows ('f33' if ds.repo.is_managed_branch() else '7p') / ('94b' if ds.repo.is_managed_branch() else 'gp') / 'MD5E-s5--275876e34cf609db118f3d84b799a790.txt' / 'MD5E-s5--275876e34cf609db118f3d84b799a790.txt')) plain_recursive = ds.status(recursive=True, result_renderer=None) # check integrity of individual reports with a focus on how symlinks # are reported for res in plain_recursive: # anything that is an "intended" symlink should be reported # as such. In contrast, anything that is a symlink for mere # technical reasons (annex using it for something in some mode) # should be reported as the thing it is representing (i.e. # a file) if 'link2' in str(res['path']): assert res['type'] == 'symlink', res else: assert res['type'] != 'symlink', res # every item must report its parent dataset assert_in('parentds', res) # bunch of smoke tests # query of '.' is same as no path eq_(plain_recursive, ds.status(path='.', recursive=True, result_renderer=None)) # duplicate paths do not change things eq_(plain_recursive, ds.status(path=['.', '.'], recursive=True, result_renderer=None)) # neither do nested paths eq_( plain_recursive, ds.status(path=['.', 'subds_modified'], recursive=True, result_renderer=None)) # when invoked in a subdir of a dataset it still reports on the full thing # just like `git status`, as long as there are no paths specified with chpwd(op.join(path, 'directory_untracked')): plain_recursive = status(recursive=True, result_renderer=None) # should be able to take absolute paths and yield the same # output eq_(plain_recursive, ds.status(path=ds.path, recursive=True, result_renderer=None)) # query for a deeply nested path from the top, should just work with a # variety of approaches rpath = op.join('subds_modified', 'subds_lvl1_modified', OBSCURE_FILENAME + u'_directory_untracked') apathobj = ds.pathobj / rpath apath = str(apathobj) # ds.repo.pathobj will have the symlink resolved arealpath = ds.repo.pathobj / rpath # TODO include explicit relative path in test for p in (rpath, apath, arealpath, None): if p is None: # change into the realpath of the dataset and # query with an explicit path with chpwd(ds.repo.path): res = ds.status(path=op.join('.', rpath), result_renderer=None) else: res = ds.status(path=p, result_renderer=None) assert_result_count( res, 1, state='untracked', type='directory', refds=ds.path, # path always comes out a full path inside the queried dataset path=apath, ) assert_result_count(ds.status(recursive=True, result_renderer=None), 1, path=apath) # limiting recursion will exclude this particular path assert_result_count(ds.status(recursive=True, recursion_limit=1, result_renderer=None), 0, path=apath) # negative limit is unlimited limit eq_(ds.status(recursive=True, recursion_limit=-1, result_renderer=None), ds.status(recursive=True, result_renderer=None))
def test_aggregation(path): with chpwd(path): assert_raises(InsufficientArgumentsError, aggregate_metadata, None) # a hierarchy of three (super/sub)datasets, each with some native metadata ds = Dataset(opj(path, 'origin')).create(force=True) # before anything aggregated we would get nothing and only a log warning with swallow_logs(new_level=logging.WARNING) as cml: assert_equal(list(query_aggregated_metadata('all', ds, [])), []) assert_re_in('.*Found no aggregated metadata.*update', cml.out) ds.config.add('datalad.metadata.nativetype', 'frictionless_datapackage', where='dataset') subds = ds.create('sub', force=True) subds.config.add('datalad.metadata.nativetype', 'frictionless_datapackage', where='dataset') subsubds = subds.create('subsub', force=True) subsubds.config.add('datalad.metadata.nativetype', 'frictionless_datapackage', where='dataset') ds.add('.', recursive=True) ok_clean_git(ds.path) # aggregate metadata from all subdatasets into any superdataset, including # intermediate ones res = ds.aggregate_metadata(recursive=True, update_mode='all') # we get success report for both subdatasets and the superdataset, # and they get saved assert_result_count(res, 6) assert_result_count(res, 3, status='ok', action='aggregate_metadata') assert_result_count(res, 3, status='ok', action='save') # nice and tidy ok_clean_git(ds.path) # quick test of aggregate report aggs = ds.metadata(get_aggregates=True) # one for each dataset assert_result_count(aggs, 3) # mother also report layout version assert_result_count(aggs, 1, path=ds.path, layout_version=1) # store clean direct result origres = ds.metadata(recursive=True) # basic sanity check assert_result_count(origres, 6) assert_result_count(origres, 3, type='dataset') assert_result_count(origres, 3, type='file') # Now that we have annex.key # three different IDs assert_equal(3, len(set([s['dsid'] for s in origres if s['type'] == 'dataset']))) # and we know about all three datasets for name in ('MOTHER_äöü東', 'child_äöü東', 'grandchild_äöü東'): assert_true( sum([s['metadata']['frictionless_datapackage']['name'] \ == assure_unicode(name) for s in origres if s['type'] == 'dataset'])) # now clone the beast to simulate a new user installing an empty dataset clone = install( opj(path, 'clone'), source=ds.path, result_xfm='datasets', return_type='item-or-list') # ID mechanism works assert_equal(ds.id, clone.id) # get fresh metadata cloneres = clone.metadata() # basic sanity check assert_result_count(cloneres, 2) assert_result_count(cloneres, 1, type='dataset') assert_result_count(cloneres, 1, type='file') # now loop over the previous results from the direct metadata query of # origin and make sure we get the extact same stuff from the clone _compare_metadata_helper(origres, clone) # now obtain a subdataset in the clone, should make no difference assert_status('ok', clone.install('sub', result_xfm=None, return_type='list')) _compare_metadata_helper(origres, clone) # test search in search tests, not all over the place ## query smoke test assert_result_count(clone.search('mother', mode='egrep'), 1) assert_result_count(clone.search('(?i)MoTHER', mode='egrep'), 1) child_res = clone.search('child', mode='egrep') assert_result_count(child_res, 2) for r in child_res: if r['type'] == 'dataset': assert_in( r['query_matched']['frictionless_datapackage.name'], r['metadata']['frictionless_datapackage']['name'])