def test_parsestringurl(): # Base URLs should end up in the location info of the resulting XML tree node = parse.tree(b"gurk", parse.SGMLOP(), parse.NS(), parse.Node(), validate=True) assert str(node[0].startloc.url) == "STRING" node = parse.tree(parse.String(b"gurk", url="root:gurk.xmlxsc"), parse.SGMLOP(), parse.NS(), parse.Node()) assert str(node[0].startloc.url) == "root:gurk.xmlxsc"
def test_url(recwarn): # The ``recwarn`` argument silences the ``RequiredAttrMissingWarning`` node = parse.tree(b"<?url root:images/gurk.gif?>", parse.SGMLOP(), parse.NS(html), parse.Node()) assert node.bytes(base="root:about/us.html") == b"../images/gurk.gif" node = parse.tree(b'<img src="root:images/gurk.gif"/>', parse.Expat(), parse.NS(html), parse.Node()) assert node.bytes(base="root:about/us.html") == b'<img src="../images/gurk.gif" />'
def check(input, output): node = parse.tree(input, parse.SGMLOP(), parse.NS(a), parse.Node(), validate=True) node = node.walknodes(a)[0] assert str(node.attrs.title) == output
def check(input, output): node = parse.tree(f'<a title="{input}">{input}</a>'.encode("utf-8"), parse.SGMLOP(), parse.NS(a.xmlns), parse.Node(pool=xsc.Pool(a, bar, foo, chars)), validate=True) node = node.walknodes(a)[0] assert str(node) == output assert str(node.attrs.title) == output
def test_parselocationsgmlop(): # sgmlop doesn't provide any location info, so check only the URL node = parse.tree(b"<z>gurk&hurz*hinz٦hunz</z>", parse.SGMLOP(), parse.NS(doc), parse.Node(), validate=True) assert len(node) == 1 assert len(node[0]) == 1 assert str(node[0][0].startloc.url) == "STRING" assert node[0][0].startloc.line is None assert node[0][0].startloc.col is None
def test_sgmlop_no_multiple_text_events(): # Test that we don't get consecutive text events with sgmlop i = parse.events( parse.Iter(b"<a>gurk & hurz & hinz & kunz</a>"), parse.SGMLOP()) assert next(i) == ("url", url.URL("ITER")) assert next(i) == ("enterstarttag", "a") assert next(i) == ("leavestarttag", "a") assert next(i) == ("text", "gurk & hurz & hinz & kunz") assert next(i) == ("endtag", "a") with pytest.raises(StopIteration): next(i)
def xsc2txt(instream, outstream, title, width): e = parse.tree(parse.Stream(instream), parse.SGMLOP(), parse.NS(doc), parse.Node(pool=xsc.docpool())) if title is None: title = xsc.Null else: title = doc.title(title) e = html.html(html.body(doc.section(title, e))) e = e.conv() outstream.write(html.astext(e, width=width))
def test_multipleparsecalls(): def check(parser): for i in range(3): try: parse.tree(b"<>gurk", parser, parse.NS(html), parse.Node(), validate=True) except Exception: pass for j in range(3): assert parse.tree(b"<a>gurk</a>", parser, parse.NS(html), parse.Node()).string() == "<a>gurk</a>" # A Parser instance should be able to parse multiple XML sources, even when some of the parse calls fail check(parse.SGMLOP()) check(parse.Expat())
def getdoc(thing, format): if thing.__doc__ is None: return xsc.Null # Remove indentation lines = textwrap.dedent(thing.__doc__).split("\n") # remove empty lines while lines and not lines[0]: del lines[0] while lines and not lines[-1]: del lines[-1] text = "\n".join(lines) modulename = _getmodulename(thing) if inspect.ismethod(thing): base = f"METHOD-DOCSTRING({modulename}.{thing.__class__.__name__}.{thing.__qualname__})" elif isinstance(thing, property): base = f"PROPERTY-DOCSTRING({modulename}.{thing})" elif inspect.isfunction(thing): base = f"FUNCTION-DOCSTRING({modulename}.{thing.__qualname__})" elif inspect.isclass(thing): base = f"CLASS-DOCSTRING({modulename}.{thing.__qualname__})" elif inspect.ismodule(thing): base = f"MODULE-DOCSTRING({modulename})" else: base = "DOCSTRING" lformat = format.lower() if lformat == "plaintext": return xsc.Text(text) elif lformat == "restructuredtext": from ll.xist.ns import rest, doc return rest.fromstring(text, base=base).conv(target=doc) elif lformat == "xist": from ll.xist.ns import doc node = parse.tree(parse.String(text), parse.SGMLOP(), parse.NS(doc), parse.Node(pool=xsc.docpool(), base=base)) if not node[ p]: # optimization: one paragraph docstrings don't need a <p> element. node = p(node) if inspect.ismethod(thing): # Use the original method instead of the decorator realthing = thing while hasattr(realthing, "__wrapped__"): realthing = realthing.__wrapped__ for ref in node.walknodes(pyref): if "module" not in ref.attrs: ref["module"] = _getmodulename(realthing) if "class_" not in ref.attrs: ref["class_"] = thing.__self__.__class__.__name__ if "method" not in ref.attrs: ref["method"] = thing.__name__ elif inspect.isfunction(thing): # Use the original method instead of the decorator while hasattr(thing, "__wrapped__"): thing = thing.__wrapped__ for ref in node.walknodes(pyref): if "module" not in ref.attrs: ref["module"] = _getmodulename(thing) elif inspect.isclass(thing): for ref in node.walknodes(pyref): if "module" not in ref.attrs: ref["module"] = _getmodulename(thing) if "class_" not in ref.attrs: ref["class_"] = thing.__name__ elif inspect.ismodule(thing): for ref in node.walknodes(pyref): if "module" not in ref.attrs: ref["module"] = thing.__name__ return node else: raise ValueError(f"unsupported __docformat__ {format!r}")
def test_frag(): e = parse.tree(b"das ist <b>klaus</b>. das ist <b>erich</b>", parse.SGMLOP(), parse.NS(html), parse.Node()) assert "".join(map(str, e.walknodes(e // html.b))) == "klauserich"