def test_streamsource(): # Stream objects are not reusable expect = open("setup.py", "rb").read() parsed = b"".join( event[1] for event in parse.Stream(open("setup.py", "rb"), bufsize=32) if event[0] == "bytes") assert parsed == expect
def printone(u): source = parse.URL(u) if isinstance(u, url.URL) else parse.Stream(u) node = parse.tree(source, parse.Tidy(), parse.NS(html), parse.Node(base="", pool=xsc.Pool(html, xml))) if args.compact: node = node.normalized().compacted() node = node.pretty() print((node.string(encoding=sys.stdout.encoding)))
def xsc2txt(instream, outstream, title, width): e = parse.tree(parse.Stream(instream), parse.SGMLOP(), parse.NS(doc), parse.Node(pool=xsc.docpool())) if title is None: title = xsc.Null else: title = doc.title(title) e = html.html(html.body(doc.section(title, e))) e = e.conv() outstream.write(html.astext(e, width=width))
def test_parsingmethods(): t = "abc\U00012345\u3042xyz" s = f'<?xml version="1.0" encoding="utf-8"?><a title="{t}">{t}</a>' b = s.encode("utf-8") def check(*pipeline): node = parse.tree(*pipeline, validate=True) node = node.walknodes(a)[0] assert str(node) == t assert str(node["title"]) == t prefixes = {None: a.xmlns} pool = xsc.Pool(a) check(b, parse.Expat(), parse.NS(a.xmlns), parse.Node(pool)) check(s, parse.Encoder(encoding="utf-8"), parse.Expat(), parse.NS(a.xmlns), parse.Node(pool)) check(parse.Iter(b), parse.Expat(), parse.NS(a.xmlns), parse.Node(pool)) # parse byte by byte check(parse.Stream(io.BytesIO(b), bufsize=1), parse.Expat(), parse.NS(a.xmlns), parse.Node(pool)) check(parse.ETree(cElementTree.fromstring(b), defaultxmlns=a.xmlns), parse.Node(pool))