def _test_serve_path_via_http(test_fpath, use_ssl, auth, tmp_dir): # pragma: no cover tmp_dir = Path(tmp_dir) test_fpath = Path(test_fpath) # First verify that filesystem layer can encode this filename # verify first that we could encode file name in this environment try: filesysencoding = sys.getfilesystemencoding() test_fpath_encoded = str(test_fpath.as_posix()).encode(filesysencoding) except UnicodeEncodeError: # pragma: no cover pytest.skip("Environment doesn't support unicode filenames") if test_fpath_encoded.decode( filesysencoding) != test_fpath.as_posix(): # pragma: no cover pytest.skip("Can't convert back/forth using %s encoding" % filesysencoding) test_fpath_full = tmp_dir / test_fpath test_fpath_full.parent.mkdir(parents=True, exist_ok=True) test_fpath_full.write_text( f'some txt and a randint {random.randint(1, 10)}') @serve_path_via_http(tmp_dir, use_ssl=use_ssl, auth=auth) def test_path_and_url(path, url): def _urlopen(url, auth=None): req = Request(url) if auth: req.add_header( "Authorization", b"Basic " + base64.standard_b64encode( '{0}:{1}'.format(*auth).encode('utf-8'))) return urlopen(req) # @serve_ should remove http_proxy from the os.environ if was present if not on_windows: assert_false('http_proxy' in os.environ) # get the "dir-view" dirurl = url + test_fpath.parent.as_posix() u = _urlopen(dirurl, auth) assert_true(u.getcode() == 200) html = u.read() # get the actual content file_html = _urlopen(url + url_quote(test_fpath.as_posix()), auth).read().decode() # verify we got the right one eq_(file_html, test_fpath_full.read_text()) if bs4 is None: return # MIH is not sure what this part below is supposed to do # possibly some kind of internal consistency test soup = bs4.BeautifulSoup(html, "html.parser") href_links = [txt.get('href') for txt in soup.find_all('a')] assert_true(len(href_links) == 1) parsed_url = f"{dirurl}/{href_links[0]}" u = _urlopen(parsed_url, auth) html = u.read().decode() eq_(html, file_html) test_path_and_url()
def test_ephemeral(ds_path=None, store_path=None, clone_path=None): dspath = Path(ds_path) store = Path(store_path) file_test = Path('file1.txt') file_testsub = Path('sub') / 'other.txt' # create the original dataset ds = Dataset(dspath) ds.create(force=True) ds.save() # put into store: ds.create_sibling_ria("ria+{}".format(store.as_uri()), "riastore", new_store_ok=True) ds.push(to="riastore", data="anything") # now, get an ephemeral clone from the RIA store: eph_clone = clone('ria+{}#{}'.format(store.as_uri(), ds.id), clone_path, reckless="ephemeral") # ephemeral clone was properly linked (store has bare repos!): clone_annex = (eph_clone.repo.dot_git / 'annex') assert_true(clone_annex.is_symlink()) assert_true(clone_annex.resolve().samefile(store / ds.id[:3] / ds.id[3:] / 'annex')) if not eph_clone.repo.is_managed_branch(): # TODO: We can't properly handle adjusted branch yet # we don't need to get files in order to access them: assert_equal((eph_clone.pathobj / file_test).read_text(), "some") assert_equal((eph_clone.pathobj / file_testsub).read_text(), "other") # can we unlock those files? eph_clone.unlock(file_test) # change content (eph_clone.pathobj / file_test).write_text("new content") eph_clone.save() # new content should already be in store # (except the store doesn't know yet) res = eph_clone.repo.fsck(remote="riastore-storage", fast=True) assert_equal(len(res), 2) assert_result_count(res, 1, success=True, file=file_test.as_posix()) assert_result_count(res, 1, success=True, file=file_testsub.as_posix()) # push back git history eph_clone.push(to=DEFAULT_REMOTE, data="nothing") # get an update in origin ds.update(merge=True, reobtain_data=True) assert_equal((ds.pathobj / file_test).read_text(), "new content")
def get_local_file_url(fname, compatibility='git-annex'): """Return OS specific URL pointing to a local file Parameters ---------- fname : string Filename. If not absolute, abspath is used compatibility : {'git', 'git-annex'}, optional On Windows, and only on that platform, file:// URLs may need to look different depending on the use case or consuming application. This switch selects different compatibility modes: 'git' for use with Git commands (e.g. `clone` or `submodule add`); 'git-annex` for Git-annex command input (e.g. `addurl`). On any other platform this setting has no effect. """ path = Path(fname).absolute() if on_windows: path = path.as_posix() furl = 'file://{}{}'.format( '/' if compatibility == 'git' else '', urlquote(re.sub(r'([a-zA-Z]):', r'\1', path))) else: # TODO: need to fix for all the encoding etc furl = str(URL(scheme='file', path=str(path))) return furl
def test_create_push_url(detection_path=None, ds_path=None, store_path=None): store_path = Path(store_path) ds_path = Path(ds_path) detection_path = Path(detection_path) ds = Dataset(ds_path).create(force=True) ds.save() # patch SSHConnection to signal it was used: from datalad.support.sshconnector import SSHManager def detector(f, d): @wraps(f) def _wrapper(*args, **kwargs): d.touch() return f(*args, **kwargs) return _wrapper url = "ria+{}".format(store_path.as_uri()) push_url = "ria+ssh://datalad-test{}".format(store_path.as_posix()) assert not detection_path.exists() with patch('datalad.support.sshconnector.SSHManager.get_connection', new=detector(SSHManager.get_connection, detection_path)): ds.create_sibling_ria(url, "datastore", push_url=push_url, new_store_ok=True) # used ssh_manager despite file-url hence used push-url (ria+ssh): assert detection_path.exists() # correct config in special remote: sr_cfg = ds.repo.get_special_remotes()[ds.siblings( name='datastore-storage')[0]['annex-uuid']] eq_(sr_cfg['url'], url) eq_(sr_cfg['push-url'], push_url) # git remote based on url (local path): eq_(ds.config.get("remote.datastore.url"), (store_path / ds.id[:3] / ds.id[3:]).as_posix()) eq_( ds.config.get("remote.datastore.pushurl"), "ssh://datalad-test{}".format( (store_path / ds.id[:3] / ds.id[3:]).as_posix())) # git-push uses SSH: detection_path.unlink() ds.push('.', to="datastore", data='nothing') assert detection_path.exists() # data push # Note, that here the patching has no effect, since the special remote # is running in a subprocess of git-annex. Hence we can't detect SSH # usage really. However, ORA remote is tested elsewhere - if it succeeds # all should be good wrt `create-sibling-ria`. ds.repo.call_annex(['copy', '.', '--to', 'datastore-storage'])
def setup_archive_remote(repo, archive_path): # for integration in a URL, we need POSIX version of the path archive_path = Path(archive_path) if 'DATALAD_TESTS_SSH' in os.environ: cfg = {'url': 'ria+ssh://datalad-test{}' ''.format(archive_path.as_posix())} else: cfg = {'url': 'ria+{}'.format(archive_path.as_uri())} initexternalremote(repo, 'archive', 'ora', config=cfg)