def test_uninstall_multiple_paths(path): ds = Dataset(path).create(force=True) subds = ds.create('deep', force=True) subds.save(recursive=True) ok_clean_git(subds.path) # needs to be able to add a combination of staged files, modified submodule, # and untracked files ds.save(recursive=True) ok_clean_git(ds.path) # drop content of all 'kill' files topfile = 'kill' deepfile = opj('deep', 'dir', 'kill') # use a tuple not a list! should also work ds.drop((topfile, deepfile), check=False) ok_clean_git(ds.path) files_left = glob(opj(ds.path, '*', '*', '*')) + glob(opj(ds.path, '*')) ok_(all([f.endswith('keep') for f in files_left if exists(f) and not isdir(f)])) ok_(not ds.repo.file_has_content(topfile)) ok_(not subds.repo.file_has_content(opj(*psplit(deepfile)[1:]))) # remove handles for all 'kill' files ds.remove([topfile, deepfile], check=False) ok_clean_git(ds.path) files_left = glob(opj(ds.path, '*', '*', '*')) + glob(opj(ds.path, '*')) ok_(all([f.endswith('keep') for f in files_left if exists(f) and not isdir(f)])) ok_(not any([f.endswith(topfile) for f in files_left]))
def test_kill(path): # nested datasets with load ds = Dataset(path).create() testfile = opj(ds.path, "file.dat") with open(testfile, 'w') as f: f.write("load") ds.save("file.dat") subds = ds.create('deep1') eq_(sorted(ds.subdatasets(result_xfm='relpaths')), ['deep1']) ok_clean_git(ds.path) # and we fail to remove since content can't be dropped res = ds.remove(on_failure='ignore') assert_result_count( res, 1, status='error', path=testfile) # Following two assertions on message are relying on the actual error. # We have a second result with status 'impossible' for the ds, that we need # to filter out for those assertions: err_result = [r for r in res if r['status'] == 'error'][0] assert_result_values_cond( [err_result], 'message', lambda x: "configured minimum number of copies not found" in x or "Could only verify the existence of 0 out of 1 necessary copies" in x ) eq_(ds.remove(recursive=True, check=False, result_xfm='datasets'), [subds, ds]) ok_(not exists(path))
def test_install_into_dataset(source, top_path): ds = create(top_path) ok_clean_git(ds.path) subds = ds.install("sub", source=source, save=False) ok_(isdir(opj(subds.path, '.git'))) ok_(subds.is_installed()) assert_in('sub', ds.subdatasets(result_xfm='relpaths')) # sub is clean: ok_clean_git(subds.path, annex=None) # top is too: ok_clean_git(ds.path, annex=None) ds.save(message='addsub') # now it is: ok_clean_git(ds.path, annex=None) # but we could also save while installing and there should be no side-effect # of saving any other changes if we state to not auto-save changes # Create a dummy change create_tree(ds.path, {'dummy.txt': 'buga'}) ok_clean_git(ds.path, untracked=['dummy.txt']) subds_ = ds.install("sub2", source=source) eq_(subds_.path, opj(ds.path, "sub2")) # for paranoid yoh ;) ok_clean_git(ds.path, untracked=['dummy.txt']) # and we should achieve the same behavior if we create a dataset # and then decide to add it create(_path_(top_path, 'sub3')) ok_clean_git(ds.path, untracked=['dummy.txt', 'sub3/']) ds.save('sub3') ok_clean_git(ds.path, untracked=['dummy.txt'])
def test_clean_subds_removal(path): ds = Dataset(path).create() subds1 = ds.create('one') subds2 = ds.create('two') eq_(sorted(ds.subdatasets(result_xfm='relpaths')), ['one', 'two']) ok_clean_git(ds.path) # now kill one res = ds.remove('one', result_xfm=None) # subds1 got uninstalled, and ds got the removal of subds1 saved assert_result_count(res, 1, path=subds1.path, action='uninstall', status='ok') assert_result_count(res, 1, path=subds1.path, action='remove', status='ok') assert_result_count(res, 1, path=ds.path, action='save', status='ok') ok_(not subds1.is_installed()) ok_clean_git(ds.path) # two must remain eq_(ds.subdatasets(result_xfm='relpaths'), ['two']) # one is gone assert(not exists(subds1.path)) # and now again, but this time remove something that is not installed ds.create('three') eq_(sorted(ds.subdatasets(result_xfm='relpaths')), ['three', 'two']) ds.uninstall('two') ok_clean_git(ds.path) eq_(sorted(ds.subdatasets(result_xfm='relpaths')), ['three', 'two']) ok_(not subds2.is_installed()) assert(exists(subds2.path)) res = ds.remove('two', result_xfm='datasets') ok_clean_git(ds.path) # subds2 was already uninstalled, now ds got the removal of subds2 saved assert(not exists(subds2.path)) eq_(ds.subdatasets(result_xfm='relpaths'), ['three']) eq_(res, [subds2, ds])
def test_ssh_manager_close(): manager = SSHManager() # check for previously existing sockets: existed_before_1 = exists(opj(manager.socket_dir, 'localhost')) existed_before_2 = exists(opj(manager.socket_dir, 'datalad-test')) manager.get_connection('ssh://localhost').open() manager.get_connection('ssh://datalad-test').open() if existed_before_1 and existed_before_2: # we need one connection to be closed and therefore being opened # by `manager` manager.get_connection('ssh://localhost').close() manager.get_connection('ssh://localhost').open() ok_(exists(opj(manager.socket_dir, get_connection_hash('localhost')))) ok_(exists(opj(manager.socket_dir, get_connection_hash('datalad-test')))) manager.close() still_exists_1 = exists(opj(manager.socket_dir, 'localhost')) still_exists_2 = exists(opj(manager.socket_dir, 'datalad-test')) eq_(existed_before_1, still_exists_1) eq_(existed_before_2, still_exists_2)
def test_GitRepo_ssh_pull(remote_path, repo_path): from datalad import ssh_manager remote_repo = GitRepo(remote_path, create=True) url = "ssh://localhost" + op.abspath(remote_path) socket_path = op.join(ssh_manager.socket_dir, get_connection_hash('localhost')) repo = GitRepo(repo_path, create=True) repo.add_remote("ssh-remote", url) # modify remote: remote_repo.checkout("ssh-test", ['-b']) with open(op.join(remote_repo.path, "ssh_testfile.dat"), "w") as f: f.write("whatever") remote_repo.add("ssh_testfile.dat") remote_repo.commit("ssh_testfile.dat added.") # file is not locally known yet: assert_not_in("ssh_testfile.dat", repo.get_indexed_files()) # pull changes: repo.pull(remote="ssh-remote", refspec=remote_repo.get_active_branch()) ok_clean_git(repo.path, annex=False) # the connection is known to the SSH manager, since fetch() requested it: assert_in(socket_path, ssh_manager._connections) # and socket was created: ok_(op.exists(socket_path)) # we actually pulled the changes assert_in("ssh_testfile.dat", repo.get_indexed_files())
def test_add_subdataset(path, other): subds = create(opj(path, 'dir'), force=True) ds = create(path, force=True) ok_(subds.repo.dirty) ok_(ds.repo.dirty) assert_not_in('dir', ds.subdatasets(result_xfm='relpaths')) # without a base dataset the next is interpreted as "add everything # in subds to subds" add(subds.path) ok_clean_git(subds.path) assert_not_in('dir', ds.subdatasets(result_xfm='relpaths')) # but with a base directory we add the dataset subds as a subdataset # to ds ds.add(subds.path) assert_in('dir', ds.subdatasets(result_xfm='relpaths')) # create another one other = create(other) # install into superdataset, but don't add other_clone = install(source=other.path, path=opj(ds.path, 'other')) ok_(other_clone.is_installed) assert_not_in('other', ds.subdatasets(result_xfm='relpaths')) # now add, it should pick up the source URL ds.add('other') # and that is why, we can reobtain it from origin ds.uninstall('other') ok_(other_clone.is_installed) ds.get('other') ok_(other_clone.is_installed)
def test_url_base(): # Basic checks assert_raises(ValueError, URL, "http://example.com", hostname='example.com') url = URL("http://example.com") eq_(url.hostname, 'example.com') eq_(url.scheme, 'http') eq_(url.port, '') # not specified -- empty strings eq_(url.username, '') # not specified -- empty strings eq_(repr(url), "URL(hostname='example.com', scheme='http')") eq_(url, "http://example.com") # automagic coercion in __eq__ neq_(URL(), URL(hostname='x')) smth = URL('smth') eq_(smth.hostname, '') ok_(bool(smth)) nok_(bool(URL())) assert_raises(ValueError, url._set_from_fields, unknown='1') with swallow_logs(new_level=logging.WARNING) as cml: # we don't "care" about params ATM so there is a warning if there are any purl = URL("http://example.com/;param") eq_(str(purl), 'http://example.com/;param') # but we do maintain original string assert_in('ParseResults contains params', cml.out) eq_(purl.as_str(), 'http://example.com/')
def test_install_list(path, top_path): # we want to be able to install several things, if these are known # (no 'source' allowed). Therefore first toplevel: ds = install(top_path, source=path, recursive=False) assert_not_in('annex.hardlink', ds.config) ok_(ds.is_installed()) sub1 = Dataset(opj(top_path, 'subm 1')) sub2 = Dataset(opj(top_path, '2')) ok_(not sub1.is_installed()) ok_(not sub2.is_installed()) # fails, when `source` is passed: assert_raises(ValueError, ds.install, path=['subm 1', '2'], source='something') # now should work: result = ds.install(path=['subm 1', '2'], result_xfm='paths') ok_(sub1.is_installed()) ok_(sub2.is_installed()) eq_(set(result), {sub1.path, sub2.path}) # and if we request it again via get, result should be empty get_result = ds.get(path=['subm 1', '2'], get_data=False) assert_status('notneeded', get_result)
def test_get_mixed_hierarchy(src, path): origin = Dataset(src).create(no_annex=True) origin_sub = origin.create('subds') with open(opj(origin.path, 'file_in_git.txt'), "w") as f: f.write('no idea') with open(opj(origin_sub.path, 'file_in_annex.txt'), "w") as f: f.write('content') origin.add('file_in_git.txt', to_git=True) origin_sub.add('file_in_annex.txt') origin.save() # now, install that thing: ds, subds = install( path, source=src, recursive=True, result_xfm='datasets', return_type='item-or-list', result_filter=None) ok_(subds.repo.file_has_content("file_in_annex.txt") is False) # and get: result = ds.get(curdir, recursive=True) # git repo and subds assert_status(['ok', 'notneeded'], result) assert_result_count( result, 1, path=opj(subds.path, "file_in_annex.txt"), status='ok') ok_(subds.repo.file_has_content("file_in_annex.txt") is True)
def test_GitRepo_pull(test_path, orig_path, clone_path): origin = GitRepo.clone(test_path, orig_path) clone = GitRepo.clone(orig_path, clone_path) filename = get_most_obscure_supported_name() with open(op.join(orig_path, filename), 'w') as f: f.write("New file.") origin.add(filename) origin.commit("new file added.") clone.pull() ok_(op.exists(op.join(clone_path, filename))) # While at it, let's test _get_remotes_having_commit a bit clone.add_remote("very_origin", test_path) clone.fetch("very_origin") eq_( clone._get_remotes_having_commit(clone.get_hexsha()), ['origin'] ) prev_commit = clone.get_hexsha('HEAD^') eq_( set(clone._get_remotes_having_commit(prev_commit)), {'origin', 'very_origin'} )
def test_install_known_subdataset(src, path): # get the superdataset: ds = install(path, source=src) # subdataset not installed: subds = Dataset(opj(path, 'subm 1')) assert_false(subds.is_installed()) assert_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths')) assert_not_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths')) # install it: ds.install('subm 1') ok_(subds.is_installed()) ok_(AnnexRepo.is_valid_repo(subds.path, allow_noninitialized=False)) # Verify that it is the correct submodule installed and not # new repository initiated eq_(set(subds.repo.get_indexed_files()), {'test.dat', 'INFO.txt', 'test-annex.dat'}) assert_not_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths')) assert_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths')) # now, get the data by reinstalling with -g: ok_(subds.repo.file_has_content('test-annex.dat') is False) with chpwd(ds.path): result = get(path='subm 1', dataset=os.curdir) assert_in_results(result, path=opj(subds.path, 'test-annex.dat')) ok_(subds.repo.file_has_content('test-annex.dat') is True) ok_(subds.is_installed())
def test_help(): stdout, stderr = run_main(['--help']) # Let's extract section titles: sections = filter(re.compile('[a-zA-Z ]{4,50}:').match, stdout.split('\n')) ok_(sections[0].startswith('Usage:')) # == Usage: nosetests [-h] if running using nose assert_equal(sections[1:], ['Positional arguments:', 'Options:'])
def test_ssh_get_connection(): manager = SSHManager() assert manager._socket_dir is None, \ "Should be unset upon initialization. Got %s" % str(manager._socket_dir) c1 = manager.get_connection('ssh://localhost') assert manager._socket_dir, "Should be set after interactions with the manager" assert_is_instance(c1, SSHConnection) # subsequent call returns the very same instance: ok_(manager.get_connection('ssh://localhost') is c1) # fail on malformed URls (meaning: our fancy URL parser can't correctly # deal with them): #assert_raises(ValueError, manager.get_connection, 'localhost') # we now allow those simple specifications of host to get_connection c2 = manager.get_connection('localhost') assert_is_instance(c2, SSHConnection) # but should fail if it looks like something else assert_raises(ValueError, manager.get_connection, 'localhost/') assert_raises(ValueError, manager.get_connection, ':localhost') # we can do what urlparse cannot # assert_raises(ValueError, manager.get_connection, 'someone@localhost') # next one is considered a proper url by urlparse (netloc:'', # path='/localhost), but eventually gets turned into SSHRI(hostname='ssh', # path='/localhost') -- which is fair IMHO -> invalid test # assert_raises(ValueError, manager.get_connection, 'ssh:/localhost') manager.close()
def test_GitRepo_add(src, path): gr = GitRepo.clone(src, path) filename = get_most_obscure_supported_name() with open(op.join(path, filename), 'w') as f: f.write("File to add to git") added = gr.add(filename) eq_(added, {'success': True, 'file': filename}) assert_in(filename, gr.get_indexed_files(), "%s not successfully added to %s" % (filename, path)) # uncommitted: ok_(gr.dirty) filename = "another.txt" with open(op.join(path, filename), 'w') as f: f.write("Another file to add to git") # include committing: added2 = gr.add(filename) gr.commit(msg="Add two files.") eq_(added2, {'success': True, 'file': filename}) assert_in(filename, gr.get_indexed_files(), "%s not successfully added to %s" % (filename, path)) ok_clean_git(path)
def test_ssh_custom_identity_file(): ifile = "/tmp/dl-test-ssh-id" # Travis if not op.exists(ifile): raise SkipTest("Travis-specific '{}' identity file does not exist" .format(ifile)) from datalad import cfg try: with patch.dict("os.environ", {"DATALAD_SSH_IDENTITYFILE": ifile}): cfg.reload(force=True) with swallow_logs(new_level=logging.DEBUG) as cml: manager = SSHManager() ssh = manager.get_connection('ssh://localhost') cmd_out, _ = ssh("echo blah") expected_socket = op.join( text_type(manager.socket_dir), get_connection_hash("localhost", identity_file=ifile, bundled=True)) ok_(exists(expected_socket)) manager.close() assert_in("-i", cml.out) assert_in(ifile, cml.out) finally: # Prevent overridden DATALAD_SSH_IDENTITYFILE from lingering. cfg.reload(force=True)
def test_submodule_deinit(path): from datalad.support.annexrepo import AnnexRepo top_repo = AnnexRepo(path, create=False) eq_({'subm 1', '2'}, {s.name for s in top_repo.get_submodules()}) # note: here init=True is ok, since we are using it just for testing with swallow_logs(new_level=logging.WARN) as cml: top_repo.update_submodule('subm 1', init=True) assert_in('Do not use update_submodule with init=True', cml.out) top_repo.update_submodule('2', init=True) # ok_(all([s.module_exists() for s in top_repo.get_submodules()])) # TODO: old assertion above if non-bare? (can't use "direct mode" in test_gitrepo) # Alternatively: New testrepo (plain git submodules) and have a dedicated # test for annexes in addition ok_(all([GitRepo.is_valid_repo(op.join(top_repo.path, s.path)) for s in top_repo.get_submodules()])) # modify submodule: with open(op.join(top_repo.path, 'subm 1', 'file_ut.dat'), "w") as f: f.write("some content") assert_raises(CommandError, top_repo.deinit_submodule, 'sub1') # using force should work: top_repo.deinit_submodule('subm 1', force=True) ok_(not top_repo.repo.submodule('subm 1').module_exists())
def test_add_subdataset(path, other): subds = create(op.join(path, 'dir'), force=True) ds = create(path, force=True) ok_(subds.repo.dirty) ok_(ds.repo.dirty) assert_not_in('dir', ds.subdatasets(result_xfm='relpaths')) # "add everything in subds to subds" save(dataset=subds.path) assert_repo_status(subds.path) assert_not_in('dir', ds.subdatasets(result_xfm='relpaths')) # but with a base directory we add the dataset subds as a subdataset # to ds res = ds.save(subds.path) assert_in_results(res, action="add", path=subds.path, refds=ds.path) assert_in('dir', ds.subdatasets(result_xfm='relpaths')) # create another one other = create(other) # install into superdataset, but don't add other_clone = install(source=other.path, path=op.join(ds.path, 'other')) # little dance to get the revolution-type dataset other_clone = Dataset(other_clone.path) ok_(other_clone.is_installed) assert_not_in('other', ds.subdatasets(result_xfm='relpaths')) # now add, it should pick up the source URL ds.save('other') # and that is why, we can reobtain it from origin ds.uninstall('other') ok_(not other_clone.is_installed()) ds.get('other') ok_(other_clone.is_installed())
def test_notclone_known_subdataset(src, path): # get the superdataset: ds = clone(src, path, result_xfm='datasets', return_type='item-or-list') # subdataset not installed: subds = Dataset(opj(path, 'subm 1')) assert_false(subds.is_installed()) assert_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths')) assert_not_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths')) # clone is not meaningful res = ds.clone('subm 1', on_failure='ignore') assert_status('error', res) assert_message('Failed to clone from any candidate source URL. ' 'Encountered errors per each url were: %s', res) # get does the job res = ds.get(path='subm 1', get_data=False) assert_status('ok', res) ok_(subds.is_installed()) ok_(AnnexRepo.is_valid_repo(subds.path, allow_noninitialized=False)) # Verify that it is the correct submodule installed and not # new repository initiated eq_(set(subds.repo.get_indexed_files()), {'test.dat', 'INFO.txt', 'test-annex.dat'}) assert_not_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths')) assert_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths'))
def _test_match_basic(matcher, query): extracts = dict( xpaths={'text': 'text()'}, csss={'favorite': '.class1::text'} ) m = matcher(query, **extracts) mg = m(dict(response="<div></div>")) ok_(inspect.isgenerator(mg)) eq_(list(mg), []) # there should be no hits mg = m(dict(response=sample1.response)) ok_(inspect.isgenerator(mg)) hits = list(mg) eq_(len(hits), 3) for hit, a_html, a_text, class1_text in zip( hits, sample1.a_htmls, sample1.a_texts, sample1.class1_texts): ok_(hit['response']) eq_(hit['match'], a_html) eq_(hit['text'], a_text) eq_(hit.get('favorite', None), class1_text) m = matcher(query, min_count=4, **extracts) mg = m(dict(response=sample1.response)) ok_(inspect.isgenerator(mg)) assert_raises(ValueError, list, mg) m = matcher(query, max_count=2, **extracts) mg = m(dict(response=sample1.response)) ok_(inspect.isgenerator(mg)) assert_raises(ValueError, list, mg)
def test_ssh_open_close(tfile1): manager = SSHManager() path = opj(manager.socket_dir, get_connection_hash('localhost')) # TODO: facilitate the test when it didn't exist existed_before = exists(path) print("%s existed: %s" % (path, existed_before)) c1 = manager.get_connection('ssh://localhost') c1.open() # control master exists for sure now ok_(exists(path)) # use connection to execute remote command: local_home = os.path.expanduser('~') # we list explicitly local HOME since we override it in module_setup out, err = c1('ls -a %r' % local_home) remote_ls = [entry for entry in out.splitlines() if entry != '.' and entry != '..'] local_ls = os.listdir(local_home) eq_(set(remote_ls), set(local_ls)) # now test for arguments containing spaces and other pleasant symbols out, err = c1('ls -l {}'.format(sh_quote(tfile1))) assert_in(tfile1, out) eq_(err, '') c1.close() # control master doesn't exist anymore: ok_(exists(path) == existed_before)
def test_clone_isnot_recursive(src, path_nr, path_r): ds = clone(src, path_nr, result_xfm='datasets', return_type='item-or-list') ok_(ds.is_installed()) # check nothin is unintentionally installed subdss = ds.subdatasets(recursive=True) assert_result_count(subdss, len(subdss), state='absent') # this also means, subdatasets to be listed as not fulfilled: eq_(set(ds.subdatasets(recursive=True, fulfilled=False, result_xfm='relpaths')), {'subm 1', '2'})
def test_create_test_dataset(): # rudimentary smoke test from datalad.api import create_test_dataset with swallow_logs(), swallow_outputs(): dss = create_test_dataset(spec='2/1-2') ok_(5 <= len(dss) <= 7) # at least five - 1 top, two on top level, 1 in each for ds in dss: ok_clean_git(ds, annex=None) # some of them are annex but we just don't check ok_(len(glob(opj(ds, 'file*'))))
def test_GitRepo_equals(path1, path2): repo1 = GitRepo(path1) repo2 = GitRepo(path1) ok_(repo1 == repo2) eq_(repo1, repo2) repo2 = GitRepo(path2) neq_(repo1, repo2) ok_(repo1 != repo2)
def test_install_dataset_from_just_source(url, path): with chpwd(path, mkdir=True): ds = install(source=url) ok_startswith(ds.path, path) ok_(ds.is_installed()) ok_(GitRepo.is_valid_repo(ds.path)) ok_clean_git(ds.path, annex=None) assert_in('INFO.txt', ds.repo.get_indexed_files())
def test_autoresolve_multiple_datasets(src, path): with chpwd(path): ds1 = install('ds1', source=src) ds2 = install('ds2', source=src) results = get([opj('ds1', 'test-annex.dat')] + glob(opj('ds2', '*.dat'))) # each ds has one file eq_(len(results), 2) ok_(ds1.repo.file_has_content('test-annex.dat') is True) ok_(ds2.repo.file_has_content('test-annex.dat') is True)
def test_clone_dataset_from_just_source(url, path): with chpwd(path, mkdir=True): ds = clone(url, result_xfm='datasets', return_type='item-or-list') ok_startswith(ds.path, path) ok_(ds.is_installed()) ok_(GitRepo.is_valid_repo(ds.path)) ok_clean_git(ds.path, annex=None) assert_in('INFO.txt', ds.repo.get_indexed_files())
def test_install_dataset_from_instance(src, dst): origin = Dataset(src) clone = install(source=origin, path=dst) assert_is_instance(clone, Dataset) ok_startswith(clone.path, dst) ok_(clone.is_installed()) ok_(GitRepo.is_valid_repo(clone.path)) ok_clean_git(clone.path, annex=None) assert_in('INFO.txt', clone.repo.get_indexed_files())
def test_inject(path): ds = Dataset(path).create(force=True) ok_(ds.repo.is_dirty()) list(run_command("nonsense command", dataset=ds, inject=True, extra_info={"custom_key": "custom_field"})) msg = ds.repo.format_commit("%B") assert_in("custom_key", msg) assert_in("nonsense command", msg)
def test_no_interaction_with_untracked_content(path): # extracted from what was a metadata test originally ds = Dataset(opj(path, 'origin')).create(force=True) create_tree(ds.path, {'sub': {'subsub': {'dat': 'lots of data'}}}) subds = ds.create('sub', force=True) subds.remove(opj('.datalad', 'config'), if_dirty='ignore') ok_(not exists(opj(subds.path, '.datalad', 'config'))) # this will only work, if `remove` didn't do anything stupid and # caused all content to be saved subds.create('subsub', force=True)
def test_is_url(): ok_(is_url('file://localhost/some')) ok_(is_url('http://localhost')) ok_(is_url('ssh://me@localhost')) # in current understanding it is indeed a url but an 'ssh', implicit=True, not just # a useless scheme=weired with a hope to point to a netloc with swallow_logs(): ok_(is_url('weired://')) nok_(is_url('relative')) nok_(is_url('/absolute')) ok_(is_url('like@sshlogin')) # actually we do allow ssh:implicit urls ATM nok_(is_url('')) nok_(is_url(' ')) nok_(is_url(123)) # stuff of other types wouldn't be considered a URL # we can pass RI instance directly ok_(is_url(RI('file://localhost/some'))) nok_(is_url(RI('relative')))
def test_target_ssh_simple(origin, src_path, target_rootpath): # prepare src source = install(src_path, source=origin, result_xfm='datasets', return_type='item-or-list') target_path = opj(target_rootpath, "basic") with swallow_logs(new_level=logging.ERROR) as cml: create_sibling(dataset=source, name="local_target", sshurl="ssh://*****:*****@with_testsui(responses=["yes"]) def interactive_assert_create_sshwebserver(): assert_create_sshwebserver( dataset=source, name="local_target", sshurl="ssh://localhost" + target_path, publish_by_default='master', existing='replace', ui=True, ) interactive_assert_create_sshwebserver() eq_("ssh://localhost" + urlquote(target_path), source.repo.get_remote_url("local_target")) ok_(source.repo.get_remote_url("local_target", push=True) is None) # ensure target tree actually replaced by source assert_false(exists(opj(target_path, 'random'))) if src_is_annex: lclcfg = AnnexRepo(src_path).config eq_(lclcfg.get('remote.local_target.annex-ignore'), 'false') # valid uuid eq_(lclcfg.get('remote.local_target.annex-uuid').count('-'), 4) # should be added too, even if URL matches prior state eq_(lclcfg.get('remote.local_target.push'), 'master') # again, by explicitly passing urls. Since we are on localhost, the # local path should work: cpkwargs = dict( dataset=source, name="local_target", sshurl="ssh://localhost", target_dir=target_path, target_url=target_path, target_pushurl="ssh://localhost" + target_path, ui=True, ) @with_testsui(responses=['yes']) def interactive_assert_create_sshwebserver(): assert_create_sshwebserver(existing='replace', **cpkwargs) interactive_assert_create_sshwebserver() if src_is_annex: target_description = AnnexRepo(target_path, create=False).get_description() eq_(target_description, target_path) eq_(target_path, source.repo.get_remote_url("local_target")) eq_("ssh://localhost" + target_path, source.repo.get_remote_url("local_target", push=True)) assert_publish_with_ui(target_path) # now, push should work: publish(dataset=source, to="local_target") # and we should be able to 'reconfigure' def process_digests_mtimes(digests, mtimes): # it should have triggered a hook, which would have created log and metadata files check_metadata = False for part in 'logs', 'metadata': metafiles = [ k for k in digests if k.startswith(_path_('.git/datalad/%s/' % part)) ] # This is in effect ONLY if we have "compatible" datalad installed on remote # end. ATM we don't have easy way to guarantee that AFAIK (yoh), # so let's not check/enforce (TODO) # assert(len(metafiles) >= 1) # we might have 2 logs if timestamps do not collide ;) # Let's actually do it to some degree if part == 'logs': # always should have those: assert (len(metafiles) >= 1) with open(opj(target_path, metafiles[0])) as f: if 'no datalad found' not in f.read(): check_metadata = True if part == 'metadata': eq_(len(metafiles), bool(check_metadata)) for f in metafiles: digests.pop(f) mtimes.pop(f) # and just pop some leftovers from annex for f in list(digests): if f.startswith('.git/annex/mergedrefs'): digests.pop(f) mtimes.pop(f) orig_digests, orig_mtimes = get_mtimes_and_digests(target_path) process_digests_mtimes(orig_digests, orig_mtimes) import time time.sleep(0.1) # just so that mtimes change assert_create_sshwebserver(existing='reconfigure', **cpkwargs) digests, mtimes = get_mtimes_and_digests(target_path) process_digests_mtimes(digests, mtimes) assert_dict_equal(orig_digests, digests) # nothing should change in terms of content # but some files should have been modified modified_files = { k for k in mtimes if orig_mtimes.get(k, 0) != mtimes.get(k, 0) } # collect which files were expected to be modified without incurring any changes ok_modified_files = { _path_('.git/hooks/post-update'), 'index.html', } ok_modified_files.add(_path_('.git/config')) ok_modified_files.update( {f for f in digests if f.startswith(_path_('.git/datalad/web'))}) # it seems that with some recent git behavior has changed a bit # and index might get touched if _path_('.git/index') in modified_files: ok_modified_files.add(_path_('.git/index')) ok_(modified_files.issuperset(ok_modified_files))
def _test_target_ssh_inherit(standardgroup, ui, use_ssh, src_path, target_path): ds = Dataset(src_path).create() if use_ssh: target_url = 'localhost:%s' % target_path else: target_url = target_path remote = "magical" # for the test of setting a group, will just smoke test while using current # user's group ds.create_sibling(target_url, name=remote, shared='group', group=os.getgid(), ui=ui) # not doing recursively if standardgroup: ds.repo.set_preferred_content('wanted', 'standard', remote) ds.repo.set_preferred_content('group', standardgroup, remote) ds.publish(to=remote) # now a month later we created a new subdataset... a few of the nested ones # A known hiccup happened when there # is also subsub ds added - we might incorrectly traverse and not prepare # sub first for subsub to inherit etc parent_ds = ds subdss = [] nlevels = 2 # gets slow: 1 - 43 sec, 2 - 49 sec , 3 - 69 sec for levels in range(nlevels): subds = parent_ds.create('sub') create_tree(subds.path, {'sub.dat': 'lots of data'}) parent_ds.save('sub', recursive=True) ok_file_under_git(subds.path, 'sub.dat', annexed=True) parent_ds = subds subdss.append(subds) target_subdss = [ Dataset(opj(*([target_path] + ['sub'] * (i + 1)))) for i in range(nlevels) ] # since we do not have yet/thus have not used an option to record to publish # to that sibling by default (e.g. --set-upstream), if we run just ds.publish # -- should fail assert_result_count( ds.publish(on_failure='ignore'), 1, status='impossible', message= 'No target sibling configured for default publication, please specific via --to' ) ds.publish( to=remote) # should be ok, non recursive; BUT it (git or us?) would # create an empty sub/ directory assert_postupdate_hooks(target_path, installed=ui) for target_sub in target_subdss: ok_(not target_sub.is_installed()) # still not there res = ds.publish(to=remote, recursive=True, on_failure='ignore') assert_result_count(res, 1 + len(subdss)) assert_status(('error', 'notneeded'), res) assert_result_count(res, len(subdss), status='error', message=("Unknown target sibling '%s' for publication", 'magical')) # Finally publishing with inheritance ds.publish(to=remote, recursive=True, missing='inherit') assert_postupdate_hooks(target_path, installed=ui) def check_dss(): # we added the remote and set all the for subds in subdss: eq_(subds.repo.get_preferred_content('wanted', remote), 'standard' if standardgroup else '') eq_(subds.repo.get_preferred_content('group', remote), standardgroup or '') for target_sub in target_subdss: ok_(target_sub.is_installed()) # it is there now eq_(target_sub.repo.config.get('core.sharedrepository'), '1') # and we have transferred the content if standardgroup and standardgroup == 'backup': # only then content should be copied ok_file_has_content(opj(target_sub.path, 'sub.dat'), 'lots of data') else: # otherwise nothing is copied by default assert_false(target_sub.repo.file_has_content('sub.dat')) check_dss() # and it should be ok to reconfigure the full hierarchy of datasets # while "inheriting". No URL must be specified, and we must not blow # but just issue a warning for the top level dataset which has no super, # so cannot inherit anything - use case is to fixup/establish the full # hierarchy on the remote site with swallow_logs(logging.WARNING) as cml: out = ds.create_sibling(None, name=remote, existing="reconfigure", inherit=True, ui=ui, recursive=True) eq_(len(out), 1 + len(subdss)) assert_in("Cannot determine super dataset", cml.out) check_dss()
def test_local_path_target_dir(path): path = Path(path) ds_main = Dataset(path / "main").create() ds_main.create_sibling(name="abspath-targetdir", sshurl=str(path / "a"), target_dir="tdir") ok_((path / "a" / "tdir").exists()) ds_main.create_sibling(name="relpath-bound-targetdir", sshurl=os.path.relpath(str(path / "b"), ds_main.path), target_dir="tdir") ok_((path / "b" / "tdir").exists()) with chpwd(path): create_sibling(dataset=ds_main.path, name="relpath-unbound-targetdir", sshurl="c", target_dir="tdir") ok_((path / "c" / "tdir").exists()) ds_main.create("subds") ds_main.create_sibling(name="rec-plain-targetdir", recursive=True, sshurl=str(path / "d"), target_dir="tdir") ok_((path / "d" / "tdir" / "subds").exists()) ds_main.create_sibling(name="rec-template-targetdir", recursive=True, sshurl=str(path / "e"), target_dir="d%RELNAME") ok_((path / "e" / "d").exists()) ok_((path / "e" / "d-subds").exists())
def test_same_website(): ok_(same_website("http://a.b", "http://a.b/2014/01/xxx/")) ok_(same_website("http://a.b/page/2/", "http://a.b/2014/01/xxx/")) ok_(same_website("https://a.b/page/2/", "http://a.b/2014/01/xxx/")) ok_(same_website("http://a.b/page/2/", "https://a.b/2014/01/xxx/"))