def test_parserequiredattrs(recwarn):
    xmlns = "http://www.example.com/required"

    # Parser should complain about required attributes that are missing
    with xsc.Pool():

        class Test(xsc.Element):
            xmlns = "http://www.example.com/required"

            class Attrs(xsc.Element.Attrs):
                class required(xsc.TextAttr):
                    required = True

        node = parse.tree(b'<Test required="foo"/>',
                          parse.Expat(),
                          parse.NS(xmlns),
                          parse.Node(),
                          validate=True)
        assert str(node[0]["required"]) == "foo"

        parse.tree(b'<Test/>',
                   parse.Expat(),
                   parse.NS(xmlns),
                   parse.Node(),
                   validate=True)
        w = recwarn.pop(xsc.RequiredAttrMissingWarning)

    node = parse.tree(b'<Test required="foo"/>',
                      parse.Expat(),
                      parse.NS(xmlns),
                      parse.Node(),
                      validate=True)
    assert node[0].__class__ is xsc.Element
    assert node[0].xmlname == "Test"
    assert node[0].xmlns == xmlns
def test_parsevalueattrs(recwarn):
    xmlns = "http://www.example.com/required2"

    # Parser should complain about attributes with illegal values, when a set of values is specified
    with xsc.Pool():

        class Test(xsc.Element):
            xmlns = "http://www.example.com/required2"

            class Attrs(xsc.Element.Attrs):
                class withvalues(xsc.TextAttr):
                    values = ("foo", "bar")

        node = parse.tree(b'<Test withvalues="bar"/>',
                          parse.Expat(),
                          parse.NS(xmlns),
                          parse.Node(),
                          validate=True)
        assert str(node[0]["withvalues"]) == "bar"

        parse.tree(b'<Test withvalues="baz"/>',
                   parse.Expat(),
                   parse.NS(xmlns),
                   parse.Node(),
                   validate=True)
        w = recwarn.pop(xsc.IllegalAttrValueWarning)
Esempio n. 3
0
def test_url(recwarn):
    # The ``recwarn`` argument silences the ``RequiredAttrMissingWarning``
    node = parse.tree(b"<?url root:images/gurk.gif?>", parse.SGMLOP(),
                      parse.NS(html), parse.Node())
    assert node.bytes(base="root:about/us.html") == b"../images/gurk.gif"

    node = parse.tree(b'<img src="root:images/gurk.gif"/>', parse.Expat(),
                      parse.NS(html), parse.Node())
    assert node.bytes(
        base="root:about/us.html") == b'<img src="../images/gurk.gif" />'
def test_parsestringurl():
    # Base URLs should end up in the location info of the resulting XML tree
    node = parse.tree(b"gurk",
                      parse.SGMLOP(),
                      parse.NS(),
                      parse.Node(),
                      validate=True)
    assert str(node[0].startloc.url) == "STRING"

    node = parse.tree(parse.String(b"gurk", url="root:gurk.xmlxsc"),
                      parse.SGMLOP(), parse.NS(), parse.Node())
    assert str(node[0].startloc.url) == "root:gurk.xmlxsc"
 def check(parser):
     for i in range(3):
         try:
             parse.tree(b"<>gurk",
                        parser,
                        parse.NS(html),
                        parse.Node(),
                        validate=True)
         except Exception:
             pass
         for j in range(3):
             assert parse.tree(b"<a>gurk</a>", parser, parse.NS(html),
                               parse.Node()).string() == "<a>gurk</a>"
def test_parseemptyattribute():
    e = parse.tree(b"<a target=''/>",
                   parse.Expat(),
                   parse.NS(html),
                   parse.Node(pool=xsc.Pool(html)),
                   validate=True)
    assert "target" in e[0].attrs
Esempio n. 7
0
	def printone(u):
		source = parse.URL(u) if isinstance(u, url.URL) else parse.Stream(u)
		node = parse.tree(source, parse.Tidy(), parse.NS(html), parse.Node(base="", pool=xsc.Pool(html, xml)))
		if args.compact:
			node = node.normalized().compacted()
		node = node.pretty()
		print((node.string(encoding=sys.stdout.encoding)))
def test_base():
    e = parse.tree(parse.String(
        b'<a xmlns="http://www.w3.org/1999/xhtml" href="gurk.html"/>',
        'http://www.gurk.de/'),
                   parse.Expat(ns=True),
                   parse.Node(pool=xsc.Pool(html)),
                   validate=True)
    assert str(e[0].attrs.href) == "http://www.gurk.de/gurk.html"
 def check(input, output):
     node = parse.tree(input,
                       parse.SGMLOP(),
                       parse.NS(a),
                       parse.Node(),
                       validate=True)
     node = node.walknodes(a)[0]
     assert str(node.attrs.title) == output
def test_htmlparse_base():
    e = parse.tree(b"<a href='gurk.gif'/>",
                   parse.Tidy(),
                   parse.NS(html),
                   parse.Node(base="hurz/index.html"),
                   validate=True)
    e = e.walknodes(html.a)[0]
    assert str(e.attrs.href) == "hurz/gurk.gif"
 def check(input, output):
     node = parse.tree(f'<a title="{input}">{input}</a>'.encode("utf-8"),
                       parse.SGMLOP(),
                       parse.NS(a.xmlns),
                       parse.Node(pool=xsc.Pool(a, bar, foo, chars)),
                       validate=True)
     node = node.walknodes(a)[0]
     assert str(node) == output
     assert str(node.attrs.title) == output
def test_expat_doctype():
    e = parse.tree(
        b'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"><a/>',
        parse.Expat(),
        parse.NS(html),
        parse.Node(),
        validate=True)
    assert not isinstance(e[0], xsc.DocType)

    e = parse.tree(
        b'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"><a/>',
        parse.Expat(doctype=True),
        parse.NS(html),
        parse.Node(),
        validate=True)
    assert isinstance(e[0], xsc.DocType)
    assert e[0].content == html.DocTypeXHTML11().content

    e = parse.tree(b'<!DOCTYPE html><a/>',
                   parse.Expat(doctype=True),
                   parse.NS(html),
                   parse.Node(),
                   validate=True)
    assert isinstance(e[0], xsc.DocType)
    assert e[0].content == "html"

    e = parse.tree(
        b'<!DOCTYPE html SYSTEM "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"><a/>',
        parse.Expat(doctype=True),
        parse.NS(html),
        parse.Node(),
        validate=True)
    assert isinstance(e[0], xsc.DocType)
    assert e[
        0].content == 'html SYSTEM "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"'

    e = parse.tree(b'<!DOCTYPE a [<!ELEMENT a EMPTY><!--gurk-->]><a/>',
                   parse.Expat(doctype=True),
                   parse.NS(html),
                   parse.Node(),
                   validate=True)
    assert isinstance(e[0], xsc.DocType)
    assert e[0].content == 'a'  # Internal subset gets dropped
def test_parse_tidy_attrs():
    e = parse.tree(
        b"<a xmlns:xl='http://www.w3.org/1999/xlink' xml:lang='de' xl:href='gurk.gif' href='gurk.gif'/>",
        parse.Tidy(),
        parse.NS(html),
        parse.Node(pool=xsc.Pool(html, xml, xlink)),
        validate=True)
    a = e.walknodes(html.a)[0]
    assert str(a.attrs["href"]) == "gurk.gif"
    assert str(a.attrs[xml.Attrs.lang]) == "de"
    assert str(a.attrs[xlink.Attrs.href]) == "gurk.gif"
def test_plain_entity():
    with warnings.catch_warnings(record=True) as ws:
        node = parse.tree(b"<a xmlns='gurk'>&hurz;</a>",
                          parse.Expat(ns=True),
                          parse.Node(pool=xsc.Pool()),
                          validate=True)[0][0]

    assert node.__class__ is xsc.Entity
    assert node.xmlname == "hurz"

    assert len(ws) == 2
    assert all(issubclass(w.category, xsc.UndeclaredNodeWarning) for w in ws)
def test_parselocationsgmlop():
    # sgmlop doesn't provide any location info, so check only the URL
    node = parse.tree(b"<z>gurk&amp;hurz&#42;hinz&#x666;hunz</z>",
                      parse.SGMLOP(),
                      parse.NS(doc),
                      parse.Node(),
                      validate=True)
    assert len(node) == 1
    assert len(node[0]) == 1
    assert str(node[0][0].startloc.url) == "STRING"
    assert node[0][0].startloc.line is None
    assert node[0][0].startloc.col is None
def test_parselocationexpat():
    # Check that expat gets the location info right
    node = parse.tree(b"<z>gurk&amp;hurz&#42;hinz&#x666;hunz</z>",
                      parse.Expat(),
                      parse.NS(doc),
                      parse.Node(),
                      validate=True)
    assert len(node) == 1
    assert len(node[0]) == 1
    assert str(node[0][0].startloc.url) == "STRING"
    assert node[0][0].startloc.line == 0
    assert node[0][0].startloc.col == 36  # expat reports the *end* of the text
def test_expat_xmldecl():
    e = parse.tree(
        b"<?xml version='1.0' encoding='utf-8' standalone='yes'?><a/>",
        parse.Expat(),
        parse.NS(html),
        parse.Node(),
        validate=True)
    assert not isinstance(e[0], xml.XML)

    e = parse.tree(b"<a/>",
                   parse.Expat(xmldecl=True),
                   parse.NS(html),
                   parse.Node(),
                   validate=True)
    assert not isinstance(e[0], xml.XML)

    e = parse.tree(b"<?xml version='1.0'?><a/>",
                   parse.Expat(xmldecl=True),
                   parse.NS(html),
                   parse.Node(),
                   validate=True)
    assert isinstance(e[0], xml.XML)
    assert e[0].content == 'version="1.0"'

    e = parse.tree(b"<?xml version='1.0' encoding='utf-8'?><a/>",
                   parse.Expat(xmldecl=True),
                   parse.NS(html),
                   parse.Node(),
                   validate=True)
    assert isinstance(e[0], xml.XML)
    assert e[0].content == 'version="1.0" encoding="utf-8"'

    e = parse.tree(
        b"<?xml version='1.0' encoding='utf-8' standalone='yes'?><a/>",
        parse.Expat(xmldecl=True),
        parse.NS(html),
        parse.Node(),
        validate=True)
    assert isinstance(e[0], xml.XML)
    assert e[0].content == 'version="1.0" encoding="utf-8" standalone="yes"'
def test_parseurls():
    # Check proper URL handling when parsing ``URLAttr`` or ``StyleAttr`` attributes
    node = parse.tree(
        b'<a href="4.html" style="background-image: url(3.gif);"/>',
        parse.Expat(),
        parse.NS(html),
        parse.Node(base="root:1/2.html"),
        validate=True)
    assert str(node[0]["style"]) == "background-image: url(root:1/3.gif)"
    assert node[0]["style"].urls() == [url.URL("root:1/3.gif")]
    assert str(node[0]["href"]) == "root:1/4.html"
    assert node[0]["href"].forInput(
        root="gurk/hurz.html") == url.URL("gurk/1/4.html")
def test_plain_element():
    with warnings.catch_warnings(record=True) as ws:
        node = parse.tree(b"<a xmlns='gurk'/>",
                          parse.Expat(ns=True),
                          parse.Node(pool=xsc.Pool()),
                          validate=True)[0]

    assert node.__class__ is xsc.Element
    assert node.xmlns == "gurk"
    assert node.xmlname == "a"

    assert len(ws) == 1
    assert issubclass(ws[0].category, xsc.UndeclaredNodeWarning)
def test_plain_procinst():
    with warnings.catch_warnings(record=True) as ws:
        node = parse.tree(b"<a xmlns='gurk'><?hurz text?></a>",
                          parse.Expat(ns=True),
                          parse.Node(pool=xsc.Pool()),
                          validate=True)[0][0]

    assert node.__class__ is xsc.ProcInst
    assert node.xmlname == "hurz"
    assert node.content == "text"

    assert len(ws) == 2
    assert all(issubclass(w.category, xsc.UndeclaredNodeWarning) for w in ws)
def test_xmlns():
    s = f"<z xmlns={doc.xmlns!r}><rb xmlns={ruby.xmlns!r}/><z/></z>".encode(
        "utf-8")
    e = parse.tree(s,
                   parse.Expat(ns=True),
                   parse.Node(pool=xsc.Pool(doc, ruby)),
                   validate=True)

    assert e[0].xmlns == doc.xmlns
    assert e[0][0].xmlns == ruby.xmlns

    s = f"<a xmlns={html.xmlns!r}><a xmlns={ihtml.xmlns!r}/></a>".encode(
        "utf-8")
    e = parse.tree(s,
                   parse.Expat(ns=True),
                   parse.Node(pool=xsc.Pool(html, ihtml)),
                   validate=True)
    assert isinstance(e[0], html.a)
    assert isinstance(e[0][0], ihtml.a)

    s = f"<a><a xmlns={ihtml.xmlns!r}/></a>".encode("utf-8")
    with warnings.catch_warnings(record=True) as ws:
        e = parse.tree(s,
                       parse.Expat(),
                       parse.NS(html),
                       parse.Node(pool=xsc.Pool(ihtml)),
                       validate=True)
    assert e[0].__class__ is xsc.Element
    assert e[0].xmlname == "a"
    assert e[0].xmlns == html.xmlns
    assert isinstance(e[0][0], ihtml.a)
    assert len(ws) == 1
    assert issubclass(ws[0].category, xsc.UndeclaredNodeWarning)

    e = parse.tree(s,
                   parse.Expat(),
                   parse.NS(html),
                   parse.Node(pool=xsc.Pool(html, ihtml)),
                   validate=True)
    assert isinstance(e[0], html.a)
    assert isinstance(e[0][0], ihtml.a)

    s = f"<z xmlns={doc.xmlns!r}/>".encode("utf-8")
    e = parse.tree(s,
                   parse.Expat(ns=True),
                   parse.Node(pool=xsc.Pool(doc.z)),
                   validate=True)
    assert isinstance(e[0], doc.z)

    with warnings.catch_warnings(record=True) as ws:
        e = parse.tree(s,
                       parse.Expat(ns=True),
                       parse.Node(pool=xsc.Pool()),
                       validate=True)
    assert e[0].__class__ is xsc.Element
    assert e[0].xmlname == "z"
    assert e[0].xmlns == doc.xmlns
    assert len(ws) == 1
    assert issubclass(ws[0].category, xsc.UndeclaredNodeWarning)
Esempio n. 22
0
def xsc2txt(instream, outstream, title, width):
    e = parse.tree(parse.Stream(instream), parse.SGMLOP(), parse.NS(doc),
                   parse.Node(pool=xsc.docpool()))

    if title is None:
        title = xsc.Null
    else:
        title = doc.title(title)
    e = html.html(html.body(doc.section(title, e)))

    e = e.conv()

    outstream.write(html.astext(e, width=width))
def test_parsingmethods():
    t = "abc\U00012345\u3042xyz"
    s = f'<?xml version="1.0" encoding="utf-8"?><a title="{t}">{t}</a>'
    b = s.encode("utf-8")

    def check(*pipeline):
        node = parse.tree(*pipeline, validate=True)
        node = node.walknodes(a)[0]
        assert str(node) == t
        assert str(node["title"]) == t

    prefixes = {None: a.xmlns}
    pool = xsc.Pool(a)

    check(b, parse.Expat(), parse.NS(a.xmlns), parse.Node(pool))
    check(s, parse.Encoder(encoding="utf-8"), parse.Expat(), parse.NS(a.xmlns),
          parse.Node(pool))
    check(parse.Iter(b), parse.Expat(), parse.NS(a.xmlns),
          parse.Node(pool))  # parse byte by byte
    check(parse.Stream(io.BytesIO(b), bufsize=1), parse.Expat(),
          parse.NS(a.xmlns), parse.Node(pool))
    check(parse.ETree(cElementTree.fromstring(b), defaultxmlns=a.xmlns),
          parse.Node(pool))
def test_itertree_large():
    def xml():
        yield f"<ul xmlns='{html.xmlns}'>".encode("utf-8")
        for i in range(1000):
            yield f"<li>{i}</li>".encode("utf-8")
        yield "</ul>".encode("utf-8")

    for (i, c) in enumerate(
            parse.itertree(parse.Iter(xml()),
                           parse.Expat(ns=True),
                           parse.Node(),
                           selector=html.li,
                           validate=True)):
        assert int(str(c.node)) == i
        c.path[-2].content.clear()
def test_itertree_skip():
    def xml():
        yield f"<ul xmlns='{html.xmlns}'>".encode("utf-8")
        for i in range(10):
            yield f"<li>{i}</li>".encode("utf-8")
        yield "</ul>".encode("utf-8")

    for c in parse.itertree(parse.Iter(xml()),
                            parse.Expat(ns=True),
                            parse.Node(),
                            enterelementnode=True,
                            validate=True):
        if isinstance(c.node, html.ul):
            c.entercontent = False
        assert not isinstance(c.node, html.li)
Esempio n. 26
0
def makexnd(stream, encoding=None, shareattrs="dupes", model="simple"):
	# :obj:`stream` can be a stream, an :class:`URL` or ``str``/``bytes``
	encoding = None
	if isinstance(stream, str):
		encoding = "utf-8"
		stream = stream.encode(encoding)
	node = parse.tree(stream, parse.Expat(encoding=encoding), parse.NS(tld), parse.Node())

	# get and convert the taglib object
	xnd = misc.first(node.walknodes(tld.taglib)).asxnd(model=model)

	if shareattrs=="dupes":
		xnd.shareattrs(False)
	elif shareattrs=="all":
		xnd.shareattrs(True)
	return xnd
def test_nsparse():
    # A prepopulated prefix mapping and xmlns attributes should work together
    xml = b"""
		<x:a>
			<x:a xmlns:x='http://www.w3.org/1999/xhtml'>
				<x:a xmlns:x='http://xmlns.livinglogic.de/xist/ns/doc'>gurk</x:a>
			</x:a>
		</x:a>
	"""
    check = doc.a(html.a(doc.a("gurk")))
    node = parse.tree(xml,
                      parse.Expat(),
                      parse.NS(x=doc),
                      parse.Node(),
                      validate=True)
    node = node.walknodes(
        xsc.Element)[0].compacted()  # get rid of the Frag and whitespace
    assert node == check
Esempio n. 28
0
def getdoc(thing, format):
    if thing.__doc__ is None:
        return xsc.Null

    # Remove indentation
    lines = textwrap.dedent(thing.__doc__).split("\n")

    # remove empty lines
    while lines and not lines[0]:
        del lines[0]
    while lines and not lines[-1]:
        del lines[-1]

    text = "\n".join(lines)

    modulename = _getmodulename(thing)
    if inspect.ismethod(thing):
        base = f"METHOD-DOCSTRING({modulename}.{thing.__class__.__name__}.{thing.__qualname__})"
    elif isinstance(thing, property):
        base = f"PROPERTY-DOCSTRING({modulename}.{thing})"
    elif inspect.isfunction(thing):
        base = f"FUNCTION-DOCSTRING({modulename}.{thing.__qualname__})"
    elif inspect.isclass(thing):
        base = f"CLASS-DOCSTRING({modulename}.{thing.__qualname__})"
    elif inspect.ismodule(thing):
        base = f"MODULE-DOCSTRING({modulename})"
    else:
        base = "DOCSTRING"

    lformat = format.lower()
    if lformat == "plaintext":
        return xsc.Text(text)
    elif lformat == "restructuredtext":
        from ll.xist.ns import rest, doc
        return rest.fromstring(text, base=base).conv(target=doc)
    elif lformat == "xist":
        from ll.xist.ns import doc
        node = parse.tree(parse.String(text), parse.SGMLOP(), parse.NS(doc),
                          parse.Node(pool=xsc.docpool(), base=base))
        if not node[
                p]:  # optimization: one paragraph docstrings don't need a <p> element.
            node = p(node)

        if inspect.ismethod(thing):
            # Use the original method instead of the decorator
            realthing = thing
            while hasattr(realthing, "__wrapped__"):
                realthing = realthing.__wrapped__
            for ref in node.walknodes(pyref):
                if "module" not in ref.attrs:
                    ref["module"] = _getmodulename(realthing)
                    if "class_" not in ref.attrs:
                        ref["class_"] = thing.__self__.__class__.__name__
                        if "method" not in ref.attrs:
                            ref["method"] = thing.__name__
        elif inspect.isfunction(thing):
            # Use the original method instead of the decorator
            while hasattr(thing, "__wrapped__"):
                thing = thing.__wrapped__
            for ref in node.walknodes(pyref):
                if "module" not in ref.attrs:
                    ref["module"] = _getmodulename(thing)
        elif inspect.isclass(thing):
            for ref in node.walknodes(pyref):
                if "module" not in ref.attrs:
                    ref["module"] = _getmodulename(thing)
                    if "class_" not in ref.attrs:
                        ref["class_"] = thing.__name__
        elif inspect.ismodule(thing):
            for ref in node.walknodes(pyref):
                if "module" not in ref.attrs:
                    ref["module"] = thing.__name__
        return node
    else:
        raise ValueError(f"unsupported __docformat__ {format!r}")
def test_frag():
    e = parse.tree(b"das ist <b>klaus</b>. das ist <b>erich</b>",
                   parse.SGMLOP(), parse.NS(html), parse.Node())
    assert "".join(map(str, e.walknodes(e // html.b))) == "klauserich"
def test_parse_tidy_empty():
    e = parse.tree(b"", parse.Tidy(), parse.NS(), parse.Node(), validate=True)
    assert not e