コード例 #1
0
def test_parsevalueattrs(recwarn):
    xmlns = "http://www.example.com/required2"

    # Parser should complain about attributes with illegal values, when a set of values is specified
    with xsc.Pool():

        class Test(xsc.Element):
            xmlns = "http://www.example.com/required2"

            class Attrs(xsc.Element.Attrs):
                class withvalues(xsc.TextAttr):
                    values = ("foo", "bar")

        node = parse.tree(b'<Test withvalues="bar"/>',
                          parse.Expat(),
                          parse.NS(xmlns),
                          parse.Node(),
                          validate=True)
        assert str(node[0]["withvalues"]) == "bar"

        parse.tree(b'<Test withvalues="baz"/>',
                   parse.Expat(),
                   parse.NS(xmlns),
                   parse.Node(),
                   validate=True)
        w = recwarn.pop(xsc.IllegalAttrValueWarning)
コード例 #2
0
def test_parserequiredattrs(recwarn):
    xmlns = "http://www.example.com/required"

    # Parser should complain about required attributes that are missing
    with xsc.Pool():

        class Test(xsc.Element):
            xmlns = "http://www.example.com/required"

            class Attrs(xsc.Element.Attrs):
                class required(xsc.TextAttr):
                    required = True

        node = parse.tree(b'<Test required="foo"/>',
                          parse.Expat(),
                          parse.NS(xmlns),
                          parse.Node(),
                          validate=True)
        assert str(node[0]["required"]) == "foo"

        parse.tree(b'<Test/>',
                   parse.Expat(),
                   parse.NS(xmlns),
                   parse.Node(),
                   validate=True)
        w = recwarn.pop(xsc.RequiredAttrMissingWarning)

    node = parse.tree(b'<Test required="foo"/>',
                      parse.Expat(),
                      parse.NS(xmlns),
                      parse.Node(),
                      validate=True)
    assert node[0].__class__ is xsc.Element
    assert node[0].xmlname == "Test"
    assert node[0].xmlns == xmlns
コード例 #3
0
def test_url(recwarn):
	# The ``recwarn`` argument silences the ``RequiredAttrMissingWarning``
	node = parse.tree(b"<?url root:images/gurk.gif?>", parse.SGMLOP(), parse.NS(html), parse.Node())
	assert node.bytes(base="root:about/us.html") == b"../images/gurk.gif"

	node = parse.tree(b'<img src="root:images/gurk.gif"/>', parse.Expat(), parse.NS(html), parse.Node())
	assert node.bytes(base="root:about/us.html") == b'<img src="../images/gurk.gif" />'
コード例 #4
0
def test_parseemptyattribute():
    e = parse.tree(b"<a target=''/>",
                   parse.Expat(),
                   parse.NS(html),
                   parse.Node(pool=xsc.Pool(html)),
                   validate=True)
    assert "target" in e[0].attrs
コード例 #5
0
def test_base():
    e = parse.tree(parse.String(
        b'<a xmlns="http://www.w3.org/1999/xhtml" href="gurk.html"/>',
        'http://www.gurk.de/'),
                   parse.Expat(ns=True),
                   parse.Node(pool=xsc.Pool(html)),
                   validate=True)
    assert str(e[0].attrs.href) == "http://www.gurk.de/gurk.html"
コード例 #6
0
def test_expat_doctype():
    e = parse.tree(
        b'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"><a/>',
        parse.Expat(),
        parse.NS(html),
        parse.Node(),
        validate=True)
    assert not isinstance(e[0], xsc.DocType)

    e = parse.tree(
        b'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"><a/>',
        parse.Expat(doctype=True),
        parse.NS(html),
        parse.Node(),
        validate=True)
    assert isinstance(e[0], xsc.DocType)
    assert e[0].content == html.DocTypeXHTML11().content

    e = parse.tree(b'<!DOCTYPE html><a/>',
                   parse.Expat(doctype=True),
                   parse.NS(html),
                   parse.Node(),
                   validate=True)
    assert isinstance(e[0], xsc.DocType)
    assert e[0].content == "html"

    e = parse.tree(
        b'<!DOCTYPE html SYSTEM "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"><a/>',
        parse.Expat(doctype=True),
        parse.NS(html),
        parse.Node(),
        validate=True)
    assert isinstance(e[0], xsc.DocType)
    assert e[
        0].content == 'html SYSTEM "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"'

    e = parse.tree(b'<!DOCTYPE a [<!ELEMENT a EMPTY><!--gurk-->]><a/>',
                   parse.Expat(doctype=True),
                   parse.NS(html),
                   parse.Node(),
                   validate=True)
    assert isinstance(e[0], xsc.DocType)
    assert e[0].content == 'a'  # Internal subset gets dropped
コード例 #7
0
def test_expat_events_on_exception():
    # Test that all collected events are output before an exception is thrown
    i = parse.events(b"<x/>schrott", parse.Expat())
    assert next(i) == ("url", url.URL("STRING"))
    assert next(i) == ("position", (0, 0))
    assert next(i) == ("enterstarttag", "x")
    assert next(i) == ("leavestarttag", "x")
    assert next(i) == ("position", (0, 4))
    assert next(i) == ("endtag", "x")
    with pytest.raises(expat.ExpatError):
        next(i)
コード例 #8
0
def test_plain_entity():
    with warnings.catch_warnings(record=True) as ws:
        node = parse.tree(b"<a xmlns='gurk'>&hurz;</a>",
                          parse.Expat(ns=True),
                          parse.Node(pool=xsc.Pool()),
                          validate=True)[0][0]

    assert node.__class__ is xsc.Entity
    assert node.xmlname == "hurz"

    assert len(ws) == 2
    assert all(issubclass(w.category, xsc.UndeclaredNodeWarning) for w in ws)
コード例 #9
0
def test_parselocationexpat():
    # Check that expat gets the location info right
    node = parse.tree(b"<z>gurk&amp;hurz&#42;hinz&#x666;hunz</z>",
                      parse.Expat(),
                      parse.NS(doc),
                      parse.Node(),
                      validate=True)
    assert len(node) == 1
    assert len(node[0]) == 1
    assert str(node[0][0].startloc.url) == "STRING"
    assert node[0][0].startloc.line == 0
    assert node[0][0].startloc.col == 36  # expat reports the *end* of the text
コード例 #10
0
 def check(encoding):
     node = xsc.Frag(
         html.div(
             php.php("echo $foo"),
             abbr.html(),
             html.div("gurk", class_="hurz"),
             "\u3042",
         ))
     s = node.bytes(encoding=encoding)
     node2 = parse.tree(s, parse.Expat(), parse.NS(html),
                        xsc.Pool(html, php, abbr))
     assert node == node2
コード例 #11
0
def test_expat_xmldecl():
    e = parse.tree(
        b"<?xml version='1.0' encoding='utf-8' standalone='yes'?><a/>",
        parse.Expat(),
        parse.NS(html),
        parse.Node(),
        validate=True)
    assert not isinstance(e[0], xml.XML)

    e = parse.tree(b"<a/>",
                   parse.Expat(xmldecl=True),
                   parse.NS(html),
                   parse.Node(),
                   validate=True)
    assert not isinstance(e[0], xml.XML)

    e = parse.tree(b"<?xml version='1.0'?><a/>",
                   parse.Expat(xmldecl=True),
                   parse.NS(html),
                   parse.Node(),
                   validate=True)
    assert isinstance(e[0], xml.XML)
    assert e[0].content == 'version="1.0"'

    e = parse.tree(b"<?xml version='1.0' encoding='utf-8'?><a/>",
                   parse.Expat(xmldecl=True),
                   parse.NS(html),
                   parse.Node(),
                   validate=True)
    assert isinstance(e[0], xml.XML)
    assert e[0].content == 'version="1.0" encoding="utf-8"'

    e = parse.tree(
        b"<?xml version='1.0' encoding='utf-8' standalone='yes'?><a/>",
        parse.Expat(xmldecl=True),
        parse.NS(html),
        parse.Node(),
        validate=True)
    assert isinstance(e[0], xml.XML)
    assert e[0].content == 'version="1.0" encoding="utf-8" standalone="yes"'
コード例 #12
0
def test_xmlns():
    s = f"<z xmlns={doc.xmlns!r}><rb xmlns={ruby.xmlns!r}/><z/></z>".encode(
        "utf-8")
    e = parse.tree(s,
                   parse.Expat(ns=True),
                   parse.Node(pool=xsc.Pool(doc, ruby)),
                   validate=True)

    assert e[0].xmlns == doc.xmlns
    assert e[0][0].xmlns == ruby.xmlns

    s = f"<a xmlns={html.xmlns!r}><a xmlns={ihtml.xmlns!r}/></a>".encode(
        "utf-8")
    e = parse.tree(s,
                   parse.Expat(ns=True),
                   parse.Node(pool=xsc.Pool(html, ihtml)),
                   validate=True)
    assert isinstance(e[0], html.a)
    assert isinstance(e[0][0], ihtml.a)

    s = f"<a><a xmlns={ihtml.xmlns!r}/></a>".encode("utf-8")
    with warnings.catch_warnings(record=True) as ws:
        e = parse.tree(s,
                       parse.Expat(),
                       parse.NS(html),
                       parse.Node(pool=xsc.Pool(ihtml)),
                       validate=True)
    assert e[0].__class__ is xsc.Element
    assert e[0].xmlname == "a"
    assert e[0].xmlns == html.xmlns
    assert isinstance(e[0][0], ihtml.a)
    assert len(ws) == 1
    assert issubclass(ws[0].category, xsc.UndeclaredNodeWarning)

    e = parse.tree(s,
                   parse.Expat(),
                   parse.NS(html),
                   parse.Node(pool=xsc.Pool(html, ihtml)),
                   validate=True)
    assert isinstance(e[0], html.a)
    assert isinstance(e[0][0], ihtml.a)

    s = f"<z xmlns={doc.xmlns!r}/>".encode("utf-8")
    e = parse.tree(s,
                   parse.Expat(ns=True),
                   parse.Node(pool=xsc.Pool(doc.z)),
                   validate=True)
    assert isinstance(e[0], doc.z)

    with warnings.catch_warnings(record=True) as ws:
        e = parse.tree(s,
                       parse.Expat(ns=True),
                       parse.Node(pool=xsc.Pool()),
                       validate=True)
    assert e[0].__class__ is xsc.Element
    assert e[0].xmlname == "z"
    assert e[0].xmlns == doc.xmlns
    assert len(ws) == 1
    assert issubclass(ws[0].category, xsc.UndeclaredNodeWarning)
コード例 #13
0
def test_parseurls():
    # Check proper URL handling when parsing ``URLAttr`` or ``StyleAttr`` attributes
    node = parse.tree(
        b'<a href="4.html" style="background-image: url(3.gif);"/>',
        parse.Expat(),
        parse.NS(html),
        parse.Node(base="root:1/2.html"),
        validate=True)
    assert str(node[0]["style"]) == "background-image: url(root:1/3.gif)"
    assert node[0]["style"].urls() == [url.URL("root:1/3.gif")]
    assert str(node[0]["href"]) == "root:1/4.html"
    assert node[0]["href"].forInput(
        root="gurk/hurz.html") == url.URL("gurk/1/4.html")
コード例 #14
0
def test_plain_element():
    with warnings.catch_warnings(record=True) as ws:
        node = parse.tree(b"<a xmlns='gurk'/>",
                          parse.Expat(ns=True),
                          parse.Node(pool=xsc.Pool()),
                          validate=True)[0]

    assert node.__class__ is xsc.Element
    assert node.xmlns == "gurk"
    assert node.xmlname == "a"

    assert len(ws) == 1
    assert issubclass(ws[0].category, xsc.UndeclaredNodeWarning)
コード例 #15
0
def test_plain_procinst():
    with warnings.catch_warnings(record=True) as ws:
        node = parse.tree(b"<a xmlns='gurk'><?hurz text?></a>",
                          parse.Expat(ns=True),
                          parse.Node(pool=xsc.Pool()),
                          validate=True)[0][0]

    assert node.__class__ is xsc.ProcInst
    assert node.xmlname == "hurz"
    assert node.content == "text"

    assert len(ws) == 2
    assert all(issubclass(w.category, xsc.UndeclaredNodeWarning) for w in ws)
コード例 #16
0
def test_parsingmethods():
    t = "abc\U00012345\u3042xyz"
    s = f'<?xml version="1.0" encoding="utf-8"?><a title="{t}">{t}</a>'
    b = s.encode("utf-8")

    def check(*pipeline):
        node = parse.tree(*pipeline, validate=True)
        node = node.walknodes(a)[0]
        assert str(node) == t
        assert str(node["title"]) == t

    prefixes = {None: a.xmlns}
    pool = xsc.Pool(a)

    check(b, parse.Expat(), parse.NS(a.xmlns), parse.Node(pool))
    check(s, parse.Encoder(encoding="utf-8"), parse.Expat(), parse.NS(a.xmlns),
          parse.Node(pool))
    check(parse.Iter(b), parse.Expat(), parse.NS(a.xmlns),
          parse.Node(pool))  # parse byte by byte
    check(parse.Stream(io.BytesIO(b), bufsize=1), parse.Expat(),
          parse.NS(a.xmlns), parse.Node(pool))
    check(parse.ETree(cElementTree.fromstring(b), defaultxmlns=a.xmlns),
          parse.Node(pool))
コード例 #17
0
def test_itertree_large():
    def xml():
        yield f"<ul xmlns='{html.xmlns}'>".encode("utf-8")
        for i in range(1000):
            yield f"<li>{i}</li>".encode("utf-8")
        yield "</ul>".encode("utf-8")

    for (i, c) in enumerate(
            parse.itertree(parse.Iter(xml()),
                           parse.Expat(ns=True),
                           parse.Node(),
                           selector=html.li,
                           validate=True)):
        assert int(str(c.node)) == i
        c.path[-2].content.clear()
コード例 #18
0
def test_expat_no_multiple_text_events():
    # Test that we don't get consecutive text events with expat
    i = parse.events(
        parse.Iter(b"<a>gurk &amp; hurz &amp; hinz &amp; kunz</a>"),
        parse.Expat())
    assert next(i) == ("url", url.URL("ITER"))
    assert next(i) == ("position", (0, 0))
    assert next(i) == ("enterstarttag", "a")
    assert next(i) == ("leavestarttag", "a")
    assert next(i) == ("position", (0, 4))
    assert next(i) == ("text", "gurk & hurz & hinz & kunz")
    assert next(i) == ("position", (0, 40))
    assert next(i) == ("endtag", "a")
    with pytest.raises(StopIteration):
        next(i)
コード例 #19
0
def test_itertree_skip():
    def xml():
        yield f"<ul xmlns='{html.xmlns}'>".encode("utf-8")
        for i in range(10):
            yield f"<li>{i}</li>".encode("utf-8")
        yield "</ul>".encode("utf-8")

    for c in parse.itertree(parse.Iter(xml()),
                            parse.Expat(ns=True),
                            parse.Node(),
                            enterelementnode=True,
                            validate=True):
        if isinstance(c.node, html.ul):
            c.entercontent = False
        assert not isinstance(c.node, html.li)
コード例 #20
0
def makexnd(stream, encoding=None, shareattrs="dupes", model="simple"):
	# :obj:`stream` can be a stream, an :class:`URL` or ``str``/``bytes``
	encoding = None
	if isinstance(stream, str):
		encoding = "utf-8"
		stream = stream.encode(encoding)
	node = parse.tree(stream, parse.Expat(encoding=encoding), parse.NS(tld), parse.Node())

	# get and convert the taglib object
	xnd = misc.first(node.walknodes(tld.taglib)).asxnd(model=model)

	if shareattrs=="dupes":
		xnd.shareattrs(False)
	elif shareattrs=="all":
		xnd.shareattrs(True)
	return xnd
コード例 #21
0
def test_multipleparsecalls():
    def check(parser):
        for i in range(3):
            try:
                parse.tree(b"<>gurk",
                           parser,
                           parse.NS(html),
                           parse.Node(),
                           validate=True)
            except Exception:
                pass
            for j in range(3):
                assert parse.tree(b"<a>gurk</a>", parser, parse.NS(html),
                                  parse.Node()).string() == "<a>gurk</a>"

    # A Parser instance should be able to parse multiple XML sources, even when some of the parse calls fail
    check(parse.SGMLOP())
    check(parse.Expat())
コード例 #22
0
def test_nsparse():
    # A prepopulated prefix mapping and xmlns attributes should work together
    xml = b"""
		<x:a>
			<x:a xmlns:x='http://www.w3.org/1999/xhtml'>
				<x:a xmlns:x='http://xmlns.livinglogic.de/xist/ns/doc'>gurk</x:a>
			</x:a>
		</x:a>
	"""
    check = doc.a(html.a(doc.a("gurk")))
    node = parse.tree(xml,
                      parse.Expat(),
                      parse.NS(x=doc),
                      parse.Node(),
                      validate=True)
    node = node.walknodes(
        xsc.Element)[0].compacted()  # get rid of the Frag and whitespace
    assert node == check
コード例 #23
0
def test_comments():
    d = b'<html><head><style type="text/css">/*nix*/ p{/*nix*/ color: red;}</style></head><body><p>gurk</p></body></html>'
    node = parse.tree(d, parse.Expat(), parse.NS(html), parse.Node())
    css.applystylesheets(node)
    assert str(node.walknodes(html.p)[0].attrs.style) == "color: red;"
コード例 #24
0
    def convert(self, converter):
        e = parse.tree(parse.URL(self["src"].forInput()), parse.Expat(ns=True),
                       parse.Node())

        return e.convert(converter)
コード例 #25
0
#! /usr/bin/env python
# -*- coding: utf-8 -*-
# cython: language_level=3, always_allow_keywords=True

from ll.xist import xsc, parse
from ll.xist.ns import xml, html, meta

import qel_xmlns, rdf_xmlns, rdfs_xmlns, cc_xmlns, dc_xmlns

url = "http://www.amk.ca/quotations/python-quotes.xml"

if __name__ == "__main__":
    pool = xsc.Pool(html, xml, qel_xmlns, rdf_xmlns, rdfs_xmlns, cc_xmlns,
                    dc_xmlns)
    base = "root:python-quotes.html"
    e = parse.tree(parse.URL(url),
                   parse.Expat(ns=True),
                   parse.Node(pool=pool, base=base),
                   validate=False)
    e = e[qel_xmlns.quotations][0]
    e = e.compacted().conv()
    print(e.string(base=base, encoding="iso-8859-1", validate=False))
コード例 #26
0
        dvds = xsc.Frag(self[dvd]).sorted(key=namekey)
        lds = xsc.Frag(self[ld]).sorted(key=namekey)

        with xsc.build():
            with xsc.Frag() as e:
                +xml.XML()
                +html.DocTypeXHTML10transitional()
                with html.html():
                    with html.head():
                        +meta.contenttype()
                        +html.title("Media")
                        +meta.stylesheet(href="Media.css")
                    with htmlspecials.plainbody():
                        +html.h1("Media")
                        if lds:
                            +html.h2(len(lds), " LDs")
                            +html.ol(lds)
                        if dvds:
                            +html.h2(len(dvds), " DVDs")
                            +html.ol(dvds)
        return e.convert(converter)


if __name__ == "__main__":
    node = parse.tree(parse.File("Media.xml"), parse.Expat(ns=True),
                      xsc.Pool(vars(), chars, xml))
    node = node[media][0]
    node = node.conv()
    print(node.bytes(encoding="us-ascii"))
コード例 #27
0
			return str(node[name][0].content)

		dvds = xsc.Frag(self[dvd]).sorted(key=namekey)
		lds = xsc.Frag(self[ld]).sorted(key=namekey)

		with xsc.build():
			with xsc.Frag() as e:
				+xml.XML()
				+html.DocTypeXHTML10transitional()
				with html.html():
					with html.head():
						+meta.contenttype()
						+html.title("Media")
						+meta.stylesheet(href="Media.css")
					with htmlspecials.plainbody():
						+html.h1("Media")
						if lds:
							+html.h2(len(lds), " LDs")
							+html.ol(lds)
						if dvds:
							+html.h2(len(dvds), " DVDs")
							+html.ol(dvds)
		return e.convert(converter)


if __name__ == "__main__":
	node = parse.tree(parse.File("Media.xml"), parse.Expat(ns=True), xsc.Pool(vars(), chars, xml))
	node = node[media][0]
	node = node.conv()
	print((node.bytes(encoding="us-ascii")))