def test_itersource():
    expect = b"hinz & kunz"
    source = parse.Iter([b"hinz", b" & ", b"kunz"])
    for i in range(3):
        parsed = b"".join(data for (evtype, data) in source
                          if evtype == "bytes")
        assert parsed == expect
def test_sgmlop_no_multiple_text_events():
    # Test that we don't get consecutive text events with sgmlop
    i = parse.events(
        parse.Iter(b"<a>gurk &amp; hurz &amp; hinz &amp; kunz</a>"),
        parse.SGMLOP())
    assert next(i) == ("url", url.URL("ITER"))
    assert next(i) == ("enterstarttag", "a")
    assert next(i) == ("leavestarttag", "a")
    assert next(i) == ("text", "gurk & hurz & hinz & kunz")
    assert next(i) == ("endtag", "a")
    with pytest.raises(StopIteration):
        next(i)
def test_itertree_skip():
    def xml():
        yield f"<ul xmlns='{html.xmlns}'>".encode("utf-8")
        for i in range(10):
            yield f"<li>{i}</li>".encode("utf-8")
        yield "</ul>".encode("utf-8")

    for c in parse.itertree(parse.Iter(xml()),
                            parse.Expat(ns=True),
                            parse.Node(),
                            enterelementnode=True,
                            validate=True):
        if isinstance(c.node, html.ul):
            c.entercontent = False
        assert not isinstance(c.node, html.li)
def test_itertree_large():
    def xml():
        yield f"<ul xmlns='{html.xmlns}'>".encode("utf-8")
        for i in range(1000):
            yield f"<li>{i}</li>".encode("utf-8")
        yield "</ul>".encode("utf-8")

    for (i, c) in enumerate(
            parse.itertree(parse.Iter(xml()),
                           parse.Expat(ns=True),
                           parse.Node(),
                           selector=html.li,
                           validate=True)):
        assert int(str(c.node)) == i
        c.path[-2].content.clear()
def test_parsingmethods():
    t = "abc\U00012345\u3042xyz"
    s = f'<?xml version="1.0" encoding="utf-8"?><a title="{t}">{t}</a>'
    b = s.encode("utf-8")

    def check(*pipeline):
        node = parse.tree(*pipeline, validate=True)
        node = node.walknodes(a)[0]
        assert str(node) == t
        assert str(node["title"]) == t

    prefixes = {None: a.xmlns}
    pool = xsc.Pool(a)

    check(b, parse.Expat(), parse.NS(a.xmlns), parse.Node(pool))
    check(s, parse.Encoder(encoding="utf-8"), parse.Expat(), parse.NS(a.xmlns),
          parse.Node(pool))
    check(parse.Iter(b), parse.Expat(), parse.NS(a.xmlns),
          parse.Node(pool))  # parse byte by byte
    check(parse.Stream(io.BytesIO(b), bufsize=1), parse.Expat(),
          parse.NS(a.xmlns), parse.Node(pool))
    check(parse.ETree(cElementTree.fromstring(b), defaultxmlns=a.xmlns),
          parse.Node(pool))