コード例 #1
0
def test_inline_args_etree_as_src():
    url = uri.resolve(DATA_URI, "testcases/xml-base/schema.rng")
    schema_el = etree.fromstring(urlhandlers.pydata.dereference(
        uri.resolve(DATA_URI, "testcases/xml-base/schema.rng")),
                                 base_url=url)

    assert etree.iselement(schema_el)

    # pass schema_el as src, should be detected as an Element
    schema = rnginline.inline(schema_el)

    assert schema(
        etree.fromstring(
            urlhandlers.pydata.dereference(uri.resolve(url,
                                                       "positive-1.xml"))))
コード例 #2
0
def test_inline_args_etree_doc_as_src():
    url = uri.resolve(DATA_URI, "testcases/xml-base/schema.rng")
    schema_el = etree.fromstring(urlhandlers.pydata.dereference(
        uri.resolve(DATA_URI, "testcases/xml-base/schema.rng")),
                                 base_url=url)

    schema_root = schema_el.getroottree()
    assert not etree.iselement(schema_root)

    # pass etree document (not el) as src, should pull out root el and use it
    schema = rnginline.inline(schema_root)

    assert schema(
        etree.fromstring(
            urlhandlers.pydata.dereference(uri.resolve(url,
                                                       "positive-1.xml"))))
コード例 #3
0
    def makeurl(file_path, abs=False):
        """
        Create relative or absolute URL pointing to the filesystem path
        ``file_path``.

        (Absolute refers to whether or not the URL has a scheme, not whether
        the path is absolute.)

        Args:
            file_path: The path on the filesystem to point to
            abs: Whether the returned URL should be absolute (with a ``file``
                scheme) or a relative URL (URI-reference) without the scheme
        Returns:
            A ``file`` URL pointing to ``file_path``

        Note:
            The current directory of the program has no effect on this function
        Examples:
            >>> from rnginline.urlhandlers import file
            >>> file.makeurl('/tmp/foo')
            '/tmp/foo'
            >>> file.makeurl('/tmp/foo', abs=True)
            'file:/tmp/foo'
            >>> file.makeurl('file.txt')
            'file.txt'
            >>> file.makeurl('file.txt', abs=True)
            'file:file.txt'
        """
        reject_bytes(file_path=file_path)

        path = quote(file_path, quoting_func=pathname2url)
        if abs is True:
            return uri.resolve("file:", path)
        return path
コード例 #4
0
def test_override_default_base_uri():
    default_base_uri = urlhandlers.pydata.makeurl(TESTPKG,
                                                  "data/testcases/xml-base/")
    schema = rnginline.inline(url="schema.rng",
                              default_base_uri=default_base_uri)

    xml_url = uri.resolve(default_base_uri, "positive-1.xml")
    xml = etree.fromstring(urlhandlers.pydata.dereference(xml_url))

    assert schema(xml)
コード例 #5
0
def test_urllib_urljoin_does_not_work_for_us():
    """The reason for the uri module to exist."""

    # bad
    assert (urllib.parse.urljoin("custom://a/b/c/foo.txt",
                                 "bar.txt") == "bar.txt"
            )  # Not what I'd expect

    # good!
    assert (uri.resolve("custom://a/b/c/foo.txt",
                        "bar.txt") == "custom://a/b/c/bar.txt"
            )  # What I'd expect
コード例 #6
0
def test_inline_etree_el_with_no_base_uri_uses_default_base_uri():
    base_url = urlhandlers.pydata.makeurl(TESTPKG, "data/testcases/xml-base/")
    schema_bytes = urlhandlers.pydata.dereference(
        uri.resolve(base_url, "schema.rng"))

    schema_el = etree.fromstring(schema_bytes)
    assert schema_el.getroottree().docinfo.URL is None

    # The default-default base URI is the pwd, so let's use something else
    # to demonstrate this. An unhandlable URI will result in a
    # NoAvailableHandlerError when the first href is dereferenced.
    with pytest.raises(NoAvailableHandlerError):
        rnginline.inline(etree=schema_el, default_base_uri="x:")

    # If we use a sensible default base URI the references will be resolved OK,
    # even though the XML document itself has no base URI
    schema = rnginline.inline(etree=schema_el, default_base_uri=base_url)

    assert schema(
        etree.fromstring(
            urlhandlers.pydata.dereference(
                uri.resolve(base_url, "positive-1.xml"))))
コード例 #7
0
def test_overridden_base_uri_must_be_uri_ref():
    """
    The base URI, if specified, must match the URI-reference grammar. i.e. it
    is a relative or absolute URI.
    """
    bad_uri = "x:/some/path/spaces not allowed/oops"
    assert not uri.is_uri_reference(bad_uri)

    with pytest.raises(ValueError):
        rnginline.inline(
            # some random schema, not of any significance
            uri.resolve(DATA_URI, "testcases/include-1/schema.rng"),
            base_uri=bad_uri)
コード例 #8
0
    def _get_href_url(self, el):
        # validate_grammar_xml() ensures we have an href attr
        assert "href" in el.attrib

        href = el.attrib["href"]

        # RELAX NG / XLink 1.0 permit various characters in href attrs which
        # are not permitted in URLs. These have to be escaped to make the value
        # a URL.
        # TODO: The spec references XLINK 1.0, but 1.1 is available which uses
        #       IRIs for href values. Consider supporting these.
        url = escape_reserved_characters(href)

        base = self._get_base_uri(el)
        # The base url will always be set, as even if the element has no base,
        # we have a default base URI.
        assert uri.is_uri(base), (el, el.base)

        # make the href absolute against the element's base url
        return uri.resolve(base, url)
コード例 #9
0
def test_provide_base_uri():
    """
    This tests manually specifying a base URI to use for the source.
    """
    base_uri = urlhandlers.pydata.makeurl(
        TESTPKG, "data/testcases/xml-base/schema.rng")
    # Use a file object so that the inliner won't know the URI of the src
    fileobj = io.BytesIO(urlhandlers.pydata.dereference(base_uri))

    schema = rnginline.inline(
        fileobj,
        base_uri=base_uri,
        # our base URI is absolute, so the default
        # base won't have any effect.
        default_base_uri="x:/blah")

    xml_url = uri.resolve(base_uri, "positive-1.xml")
    xml = etree.fromstring(urlhandlers.pydata.dereference(xml_url))

    assert schema(xml)
コード例 #10
0
def test_inline_url_arguments_are_resolved_against_default_base_uri():
    class Stop(Exception):
        pass

    # Mock the dereference method to allow calls to be observed
    handler = mock.MagicMock()
    handler.can_handle.return_value = True
    handler.dereference.side_effect = Stop

    url = "somefile.txt"

    with pytest.raises(Stop):
        rnginline.inline(url=url, handlers=[handler])

    # The default base URL is the cwd
    expected_base = urlhandlers.file.makeurl(rnginline._get_cwd(), abs=True)
    # The url we provide should be resolved against the default base:
    expected_url = uri.resolve(expected_base, url)
    assert expected_url.startswith(expected_base)
    assert expected_url.endswith("somefile.txt")

    handler.dereference.assert_called_once_with(expected_url)
コード例 #11
0
 def _get_base_uri(self, el):
     base = "" if el.base is None else el.base
     return uri.resolve(self.default_base_uri, base)
コード例 #12
0
    def inline(self,
               src=None,
               etree=None,
               url=None,
               path=None,
               file=None,
               base_uri=None,
               create_validator=True):
        """
        Load an XML document containing a RELAX NG schema, recursively loading
        and inlining any ``<include>``/``<externalRef>`` elements to form a
        complete schema.

        URLs in <include>/<externalRef> elements are resolved against the base
        URL of their containing document, and fetched using one of this
        Inliner's urlhandlers.

        Args:
            src: The source to load the schema from. Either an lxml.etree
                Element, a URL, filesystem path or file-like object.
            etree: Explicitly provide an lxml.etree Element as the source
            url: Explicitly provide a URL as the source
            path: Explicitly provide a path as the source
            file: Explicitly provide a file-like as the source
            base_uri: A URI to override the base URI of the grammar with.
                      Useful when the source doesn't have a sensible base URI,
                      e.g. passing sys.stdin as a file.
            create_validator: If True, an lxml RelaxNG validator is created
                from the loaded XML document and returned. If False then the
                loaded XML is returned.
        Returns:
            A ``lxml.etree.RelaxNG`` validator from the fully loaded and
            inlined XML, or the XML itself, depending on the
            ``create_validator`` argument.
        Raises:
            RelaxngInlineError: (or subclass) is raised if the schema can't be
                loaded.
        """
        arg_count = reduce(operator.add,
                           (arg is not None
                            for arg in [src, etree, url, path, file]))
        if arg_count != 1:
            raise ValueError(
                "A single argument must be provided from src, "
                "etree, url, path or file. got {0:d}".format(arg_count))

        if src is not None:
            # lxml.etree Element
            if _etree.iselement(src):
                etree = src
            # lxml.etree ElementTree
            elif hasattr(src, "getroot"):
                etree = src.getroot()
            elif isinstance(src, six.string_types):
                if uri.is_uri_reference(src):
                    url = src
                else:
                    path = src
            elif hasattr(src, "read"):
                file = src
            else:
                raise ValueError(
                    "Don't know how to use src: {0!r}".format(src))

        grammar_provided_directly = etree is not None

        if path is not None:
            assert url is None and etree is None
            url = urlhandlers.file.makeurl(path)

        if file is not None:
            assert etree is None
            # Note that the file.name attr is purposefully ignored as it's not
            # possible in the general case to know whether it's a filename/path
            # or some other indicator like <stdin> or a file descriptor number.
            # base_uri can be used to safely provide a base URI.
            etree = self.parse_grammar_xml(file.read(), None)

        context = InlineContext()

        if url is not None:
            assert etree is None
            if not uri.is_uri_reference(url):
                raise ValueError(
                    "url was not a valid URL-reference: {0}".format(url))
            # IMPORTANT: resolving the input URL against the default base URI
            # is what allows the url to be a relative URI like foo/bar.rng
            # and still get handled by the filesystem handler, which requires
            # a file: URI scheme. Note also that if url is already absolute
            # with its own scheme then the default base is ignored in the
            # resolution process.
            absolute_url = uri.resolve(self.default_base_uri, url)
            etree = self.dereference_url(absolute_url, context)

        assert etree is not None
        assert _etree.iselement(etree)

        if base_uri is not None:
            if not uri.is_uri_reference(base_uri):
                raise ValueError(
                    "base_uri is not a valid URI-reference: {0}".format(
                        base_uri))
            etree.getroottree().docinfo.URL = base_uri

        if grammar_provided_directly:
            # The XML to inline was directly provided, so we'll need to
            # validate it:
            self.validate_grammar_xml(etree)
            # Note that we don't need to validate that the element has a base
            # URI, as if it only includes absolute URLs it's not needed.

        dxi = self._inline(etree, context)
        inlined = dxi.perform_insertions()
        schema = self.postprocess(inlined)

        if create_validator is True:
            return self.create_validator(schema)
        return schema
コード例 #13
0
def test_resolve_reference_must_be_uri_ref(ref, raises):
    if raises is True:
        with pytest.raises(ValueError):
            uri.resolve("x:/foo", ref)
    else:
        uri.resolve("x:/foo", ref)
コード例 #14
0
def test_resolve_base_must_be_uri(base, raises):
    if raises is True:
        with pytest.raises(ValueError):
            uri.resolve(base, "")
    else:
        uri.resolve(base, "")
コード例 #15
0
def test_strict_resolve(base, reference, target, strict):
    assert uri.resolve(base, reference, strict=strict) == target
コード例 #16
0
def test_resolve(base, reference, target):
    assert uri.resolve(base, reference) == target