def test_inline_args_etree_as_src(): url = uri.resolve(DATA_URI, "testcases/xml-base/schema.rng") schema_el = etree.fromstring(urlhandlers.pydata.dereference( uri.resolve(DATA_URI, "testcases/xml-base/schema.rng")), base_url=url) assert etree.iselement(schema_el) # pass schema_el as src, should be detected as an Element schema = rnginline.inline(schema_el) assert schema( etree.fromstring( urlhandlers.pydata.dereference(uri.resolve(url, "positive-1.xml"))))
def test_inline_args_etree_doc_as_src(): url = uri.resolve(DATA_URI, "testcases/xml-base/schema.rng") schema_el = etree.fromstring(urlhandlers.pydata.dereference( uri.resolve(DATA_URI, "testcases/xml-base/schema.rng")), base_url=url) schema_root = schema_el.getroottree() assert not etree.iselement(schema_root) # pass etree document (not el) as src, should pull out root el and use it schema = rnginline.inline(schema_root) assert schema( etree.fromstring( urlhandlers.pydata.dereference(uri.resolve(url, "positive-1.xml"))))
def makeurl(file_path, abs=False): """ Create relative or absolute URL pointing to the filesystem path ``file_path``. (Absolute refers to whether or not the URL has a scheme, not whether the path is absolute.) Args: file_path: The path on the filesystem to point to abs: Whether the returned URL should be absolute (with a ``file`` scheme) or a relative URL (URI-reference) without the scheme Returns: A ``file`` URL pointing to ``file_path`` Note: The current directory of the program has no effect on this function Examples: >>> from rnginline.urlhandlers import file >>> file.makeurl('/tmp/foo') '/tmp/foo' >>> file.makeurl('/tmp/foo', abs=True) 'file:/tmp/foo' >>> file.makeurl('file.txt') 'file.txt' >>> file.makeurl('file.txt', abs=True) 'file:file.txt' """ reject_bytes(file_path=file_path) path = quote(file_path, quoting_func=pathname2url) if abs is True: return uri.resolve("file:", path) return path
def test_override_default_base_uri(): default_base_uri = urlhandlers.pydata.makeurl(TESTPKG, "data/testcases/xml-base/") schema = rnginline.inline(url="schema.rng", default_base_uri=default_base_uri) xml_url = uri.resolve(default_base_uri, "positive-1.xml") xml = etree.fromstring(urlhandlers.pydata.dereference(xml_url)) assert schema(xml)
def test_urllib_urljoin_does_not_work_for_us(): """The reason for the uri module to exist.""" # bad assert (urllib.parse.urljoin("custom://a/b/c/foo.txt", "bar.txt") == "bar.txt" ) # Not what I'd expect # good! assert (uri.resolve("custom://a/b/c/foo.txt", "bar.txt") == "custom://a/b/c/bar.txt" ) # What I'd expect
def test_inline_etree_el_with_no_base_uri_uses_default_base_uri(): base_url = urlhandlers.pydata.makeurl(TESTPKG, "data/testcases/xml-base/") schema_bytes = urlhandlers.pydata.dereference( uri.resolve(base_url, "schema.rng")) schema_el = etree.fromstring(schema_bytes) assert schema_el.getroottree().docinfo.URL is None # The default-default base URI is the pwd, so let's use something else # to demonstrate this. An unhandlable URI will result in a # NoAvailableHandlerError when the first href is dereferenced. with pytest.raises(NoAvailableHandlerError): rnginline.inline(etree=schema_el, default_base_uri="x:") # If we use a sensible default base URI the references will be resolved OK, # even though the XML document itself has no base URI schema = rnginline.inline(etree=schema_el, default_base_uri=base_url) assert schema( etree.fromstring( urlhandlers.pydata.dereference( uri.resolve(base_url, "positive-1.xml"))))
def test_overridden_base_uri_must_be_uri_ref(): """ The base URI, if specified, must match the URI-reference grammar. i.e. it is a relative or absolute URI. """ bad_uri = "x:/some/path/spaces not allowed/oops" assert not uri.is_uri_reference(bad_uri) with pytest.raises(ValueError): rnginline.inline( # some random schema, not of any significance uri.resolve(DATA_URI, "testcases/include-1/schema.rng"), base_uri=bad_uri)
def _get_href_url(self, el): # validate_grammar_xml() ensures we have an href attr assert "href" in el.attrib href = el.attrib["href"] # RELAX NG / XLink 1.0 permit various characters in href attrs which # are not permitted in URLs. These have to be escaped to make the value # a URL. # TODO: The spec references XLINK 1.0, but 1.1 is available which uses # IRIs for href values. Consider supporting these. url = escape_reserved_characters(href) base = self._get_base_uri(el) # The base url will always be set, as even if the element has no base, # we have a default base URI. assert uri.is_uri(base), (el, el.base) # make the href absolute against the element's base url return uri.resolve(base, url)
def test_provide_base_uri(): """ This tests manually specifying a base URI to use for the source. """ base_uri = urlhandlers.pydata.makeurl( TESTPKG, "data/testcases/xml-base/schema.rng") # Use a file object so that the inliner won't know the URI of the src fileobj = io.BytesIO(urlhandlers.pydata.dereference(base_uri)) schema = rnginline.inline( fileobj, base_uri=base_uri, # our base URI is absolute, so the default # base won't have any effect. default_base_uri="x:/blah") xml_url = uri.resolve(base_uri, "positive-1.xml") xml = etree.fromstring(urlhandlers.pydata.dereference(xml_url)) assert schema(xml)
def test_inline_url_arguments_are_resolved_against_default_base_uri(): class Stop(Exception): pass # Mock the dereference method to allow calls to be observed handler = mock.MagicMock() handler.can_handle.return_value = True handler.dereference.side_effect = Stop url = "somefile.txt" with pytest.raises(Stop): rnginline.inline(url=url, handlers=[handler]) # The default base URL is the cwd expected_base = urlhandlers.file.makeurl(rnginline._get_cwd(), abs=True) # The url we provide should be resolved against the default base: expected_url = uri.resolve(expected_base, url) assert expected_url.startswith(expected_base) assert expected_url.endswith("somefile.txt") handler.dereference.assert_called_once_with(expected_url)
def _get_base_uri(self, el): base = "" if el.base is None else el.base return uri.resolve(self.default_base_uri, base)
def inline(self, src=None, etree=None, url=None, path=None, file=None, base_uri=None, create_validator=True): """ Load an XML document containing a RELAX NG schema, recursively loading and inlining any ``<include>``/``<externalRef>`` elements to form a complete schema. URLs in <include>/<externalRef> elements are resolved against the base URL of their containing document, and fetched using one of this Inliner's urlhandlers. Args: src: The source to load the schema from. Either an lxml.etree Element, a URL, filesystem path or file-like object. etree: Explicitly provide an lxml.etree Element as the source url: Explicitly provide a URL as the source path: Explicitly provide a path as the source file: Explicitly provide a file-like as the source base_uri: A URI to override the base URI of the grammar with. Useful when the source doesn't have a sensible base URI, e.g. passing sys.stdin as a file. create_validator: If True, an lxml RelaxNG validator is created from the loaded XML document and returned. If False then the loaded XML is returned. Returns: A ``lxml.etree.RelaxNG`` validator from the fully loaded and inlined XML, or the XML itself, depending on the ``create_validator`` argument. Raises: RelaxngInlineError: (or subclass) is raised if the schema can't be loaded. """ arg_count = reduce(operator.add, (arg is not None for arg in [src, etree, url, path, file])) if arg_count != 1: raise ValueError( "A single argument must be provided from src, " "etree, url, path or file. got {0:d}".format(arg_count)) if src is not None: # lxml.etree Element if _etree.iselement(src): etree = src # lxml.etree ElementTree elif hasattr(src, "getroot"): etree = src.getroot() elif isinstance(src, six.string_types): if uri.is_uri_reference(src): url = src else: path = src elif hasattr(src, "read"): file = src else: raise ValueError( "Don't know how to use src: {0!r}".format(src)) grammar_provided_directly = etree is not None if path is not None: assert url is None and etree is None url = urlhandlers.file.makeurl(path) if file is not None: assert etree is None # Note that the file.name attr is purposefully ignored as it's not # possible in the general case to know whether it's a filename/path # or some other indicator like <stdin> or a file descriptor number. # base_uri can be used to safely provide a base URI. etree = self.parse_grammar_xml(file.read(), None) context = InlineContext() if url is not None: assert etree is None if not uri.is_uri_reference(url): raise ValueError( "url was not a valid URL-reference: {0}".format(url)) # IMPORTANT: resolving the input URL against the default base URI # is what allows the url to be a relative URI like foo/bar.rng # and still get handled by the filesystem handler, which requires # a file: URI scheme. Note also that if url is already absolute # with its own scheme then the default base is ignored in the # resolution process. absolute_url = uri.resolve(self.default_base_uri, url) etree = self.dereference_url(absolute_url, context) assert etree is not None assert _etree.iselement(etree) if base_uri is not None: if not uri.is_uri_reference(base_uri): raise ValueError( "base_uri is not a valid URI-reference: {0}".format( base_uri)) etree.getroottree().docinfo.URL = base_uri if grammar_provided_directly: # The XML to inline was directly provided, so we'll need to # validate it: self.validate_grammar_xml(etree) # Note that we don't need to validate that the element has a base # URI, as if it only includes absolute URLs it's not needed. dxi = self._inline(etree, context) inlined = dxi.perform_insertions() schema = self.postprocess(inlined) if create_validator is True: return self.create_validator(schema) return schema
def test_resolve_reference_must_be_uri_ref(ref, raises): if raises is True: with pytest.raises(ValueError): uri.resolve("x:/foo", ref) else: uri.resolve("x:/foo", ref)
def test_resolve_base_must_be_uri(base, raises): if raises is True: with pytest.raises(ValueError): uri.resolve(base, "") else: uri.resolve(base, "")
def test_strict_resolve(base, reference, target, strict): assert uri.resolve(base, reference, strict=strict) == target
def test_resolve(base, reference, target): assert uri.resolve(base, reference) == target