def test_itersource(): expect = b"hinz & kunz" source = parse.Iter([b"hinz", b" & ", b"kunz"]) for i in range(3): parsed = b"".join(data for (evtype, data) in source if evtype == "bytes") assert parsed == expect
def test_sgmlop_no_multiple_text_events(): # Test that we don't get consecutive text events with sgmlop i = parse.events( parse.Iter(b"<a>gurk & hurz & hinz & kunz</a>"), parse.SGMLOP()) assert next(i) == ("url", url.URL("ITER")) assert next(i) == ("enterstarttag", "a") assert next(i) == ("leavestarttag", "a") assert next(i) == ("text", "gurk & hurz & hinz & kunz") assert next(i) == ("endtag", "a") with pytest.raises(StopIteration): next(i)
def test_itertree_skip(): def xml(): yield f"<ul xmlns='{html.xmlns}'>".encode("utf-8") for i in range(10): yield f"<li>{i}</li>".encode("utf-8") yield "</ul>".encode("utf-8") for c in parse.itertree(parse.Iter(xml()), parse.Expat(ns=True), parse.Node(), enterelementnode=True, validate=True): if isinstance(c.node, html.ul): c.entercontent = False assert not isinstance(c.node, html.li)
def test_itertree_large(): def xml(): yield f"<ul xmlns='{html.xmlns}'>".encode("utf-8") for i in range(1000): yield f"<li>{i}</li>".encode("utf-8") yield "</ul>".encode("utf-8") for (i, c) in enumerate( parse.itertree(parse.Iter(xml()), parse.Expat(ns=True), parse.Node(), selector=html.li, validate=True)): assert int(str(c.node)) == i c.path[-2].content.clear()
def test_parsingmethods(): t = "abc\U00012345\u3042xyz" s = f'<?xml version="1.0" encoding="utf-8"?><a title="{t}">{t}</a>' b = s.encode("utf-8") def check(*pipeline): node = parse.tree(*pipeline, validate=True) node = node.walknodes(a)[0] assert str(node) == t assert str(node["title"]) == t prefixes = {None: a.xmlns} pool = xsc.Pool(a) check(b, parse.Expat(), parse.NS(a.xmlns), parse.Node(pool)) check(s, parse.Encoder(encoding="utf-8"), parse.Expat(), parse.NS(a.xmlns), parse.Node(pool)) check(parse.Iter(b), parse.Expat(), parse.NS(a.xmlns), parse.Node(pool)) # parse byte by byte check(parse.Stream(io.BytesIO(b), bufsize=1), parse.Expat(), parse.NS(a.xmlns), parse.Node(pool)) check(parse.ETree(cElementTree.fromstring(b), defaultxmlns=a.xmlns), parse.Node(pool))