def _test_serve_path_via_http(test_fpath, use_ssl, auth,
                              tmp_dir):  # pragma: no cover
    tmp_dir = Path(tmp_dir)
    test_fpath = Path(test_fpath)
    # First verify that filesystem layer can encode this filename
    # verify first that we could encode file name in this environment
    try:
        filesysencoding = sys.getfilesystemencoding()
        test_fpath_encoded = str(test_fpath.as_posix()).encode(filesysencoding)
    except UnicodeEncodeError:  # pragma: no cover
        pytest.skip("Environment doesn't support unicode filenames")
    if test_fpath_encoded.decode(
            filesysencoding) != test_fpath.as_posix():  # pragma: no cover
        pytest.skip("Can't convert back/forth using %s encoding" %
                    filesysencoding)

    test_fpath_full = tmp_dir / test_fpath
    test_fpath_full.parent.mkdir(parents=True, exist_ok=True)
    test_fpath_full.write_text(
        f'some txt and a randint {random.randint(1, 10)}')

    @serve_path_via_http(tmp_dir, use_ssl=use_ssl, auth=auth)
    def test_path_and_url(path, url):
        def _urlopen(url, auth=None):
            req = Request(url)
            if auth:
                req.add_header(
                    "Authorization", b"Basic " + base64.standard_b64encode(
                        '{0}:{1}'.format(*auth).encode('utf-8')))
            return urlopen(req)

        # @serve_ should remove http_proxy from the os.environ if was present
        if not on_windows:
            assert_false('http_proxy' in os.environ)
        # get the "dir-view"
        dirurl = url + test_fpath.parent.as_posix()
        u = _urlopen(dirurl, auth)
        assert_true(u.getcode() == 200)
        html = u.read()
        # get the actual content
        file_html = _urlopen(url + url_quote(test_fpath.as_posix()),
                             auth).read().decode()
        # verify we got the right one
        eq_(file_html, test_fpath_full.read_text())

        if bs4 is None:
            return

        # MIH is not sure what this part below is supposed to do
        # possibly some kind of internal consistency test
        soup = bs4.BeautifulSoup(html, "html.parser")
        href_links = [txt.get('href') for txt in soup.find_all('a')]
        assert_true(len(href_links) == 1)
        parsed_url = f"{dirurl}/{href_links[0]}"
        u = _urlopen(parsed_url, auth)
        html = u.read().decode()
        eq_(html, file_html)

    test_path_and_url()
Beispiel #2
0
def test_ephemeral(ds_path=None, store_path=None, clone_path=None):

    dspath = Path(ds_path)
    store = Path(store_path)
    file_test = Path('file1.txt')
    file_testsub = Path('sub') / 'other.txt'

    # create the original dataset
    ds = Dataset(dspath)
    ds.create(force=True)
    ds.save()

    # put into store:
    ds.create_sibling_ria("ria+{}".format(store.as_uri()),
                          "riastore",
                          new_store_ok=True)
    ds.push(to="riastore", data="anything")

    # now, get an ephemeral clone from the RIA store:
    eph_clone = clone('ria+{}#{}'.format(store.as_uri(), ds.id),
                      clone_path,
                      reckless="ephemeral")

    # ephemeral clone was properly linked (store has bare repos!):
    clone_annex = (eph_clone.repo.dot_git / 'annex')
    assert_true(clone_annex.is_symlink())
    assert_true(clone_annex.resolve().samefile(store / ds.id[:3] / ds.id[3:] /
                                               'annex'))
    if not eph_clone.repo.is_managed_branch():
        # TODO: We can't properly handle adjusted branch yet
        # we don't need to get files in order to access them:
        assert_equal((eph_clone.pathobj / file_test).read_text(), "some")
        assert_equal((eph_clone.pathobj / file_testsub).read_text(), "other")

        # can we unlock those files?
        eph_clone.unlock(file_test)
        # change content
        (eph_clone.pathobj / file_test).write_text("new content")
        eph_clone.save()

        # new content should already be in store
        # (except the store doesn't know yet)
        res = eph_clone.repo.fsck(remote="riastore-storage", fast=True)
        assert_equal(len(res), 2)
        assert_result_count(res, 1, success=True, file=file_test.as_posix())
        assert_result_count(res, 1, success=True, file=file_testsub.as_posix())

        # push back git history
        eph_clone.push(to=DEFAULT_REMOTE, data="nothing")

        # get an update in origin
        ds.update(merge=True, reobtain_data=True)
        assert_equal((ds.pathobj / file_test).read_text(), "new content")
Beispiel #3
0
def get_local_file_url(fname, compatibility='git-annex'):
    """Return OS specific URL pointing to a local file

    Parameters
    ----------
    fname : string
        Filename.  If not absolute, abspath is used
    compatibility : {'git', 'git-annex'}, optional
        On Windows, and only on that platform, file:// URLs may need to look
        different depending on the use case or consuming application. This
        switch selects different compatibility modes: 'git' for use with
        Git commands (e.g. `clone` or `submodule add`); 'git-annex` for
        Git-annex command input (e.g. `addurl`). On any other platform this
        setting has no effect.
    """
    path = Path(fname).absolute()
    if on_windows:
        path = path.as_posix()
        furl = 'file://{}{}'.format(
            '/' if compatibility == 'git' else '',
            urlquote(re.sub(r'([a-zA-Z]):', r'\1', path)))
    else:
        # TODO:  need to fix for all the encoding etc
        furl = str(URL(scheme='file', path=str(path)))
    return furl
def test_create_push_url(detection_path=None, ds_path=None, store_path=None):

    store_path = Path(store_path)
    ds_path = Path(ds_path)
    detection_path = Path(detection_path)

    ds = Dataset(ds_path).create(force=True)
    ds.save()

    # patch SSHConnection to signal it was used:
    from datalad.support.sshconnector import SSHManager

    def detector(f, d):
        @wraps(f)
        def _wrapper(*args, **kwargs):
            d.touch()
            return f(*args, **kwargs)

        return _wrapper

    url = "ria+{}".format(store_path.as_uri())
    push_url = "ria+ssh://datalad-test{}".format(store_path.as_posix())
    assert not detection_path.exists()

    with patch('datalad.support.sshconnector.SSHManager.get_connection',
               new=detector(SSHManager.get_connection, detection_path)):

        ds.create_sibling_ria(url,
                              "datastore",
                              push_url=push_url,
                              new_store_ok=True)
        # used ssh_manager despite file-url hence used push-url (ria+ssh):
        assert detection_path.exists()

        # correct config in special remote:
        sr_cfg = ds.repo.get_special_remotes()[ds.siblings(
            name='datastore-storage')[0]['annex-uuid']]
        eq_(sr_cfg['url'], url)
        eq_(sr_cfg['push-url'], push_url)

        # git remote based on url (local path):
        eq_(ds.config.get("remote.datastore.url"),
            (store_path / ds.id[:3] / ds.id[3:]).as_posix())
        eq_(
            ds.config.get("remote.datastore.pushurl"),
            "ssh://datalad-test{}".format(
                (store_path / ds.id[:3] / ds.id[3:]).as_posix()))

        # git-push uses SSH:
        detection_path.unlink()
        ds.push('.', to="datastore", data='nothing')
        assert detection_path.exists()

        # data push
        # Note, that here the patching has no effect, since the special remote
        # is running in a subprocess of git-annex. Hence we can't detect SSH
        # usage really. However, ORA remote is tested elsewhere - if it succeeds
        # all should be good wrt `create-sibling-ria`.
        ds.repo.call_annex(['copy', '.', '--to', 'datastore-storage'])
Beispiel #5
0
def setup_archive_remote(repo, archive_path):

    # for integration in a URL, we need POSIX version of the path
    archive_path = Path(archive_path)

    if 'DATALAD_TESTS_SSH' in os.environ:
        cfg = {'url': 'ria+ssh://datalad-test{}'
                      ''.format(archive_path.as_posix())}
    else:
        cfg = {'url': 'ria+{}'.format(archive_path.as_uri())}
    initexternalremote(repo, 'archive', 'ora', config=cfg)