def test_tok_doctype(): e = [(doctype, '<!doctype>')] t = list(tok(io.StringIO('<!doctype>'))) assert_equal(t, e)
def test_tok_selfclosing(): e = [(selfclosing, '<foo/>')] t = list(tok(io.StringIO('<foo/>'))) assert_equal(t, e)
def test_tok_error_comment(): e = [(error, '<!--foo>')] t = list(tok(io.StringIO('<!--foo>'))) assert_equal(t, e)
def test_tok_error_instruction(): e = [(error, '<?instruction>')] t = list(tok(io.StringIO('<?instruction>'))) assert_equal(t, e)
def test_tok_opening(): e = [(opening, '<foo>')] t = list(tok(io.StringIO('<foo>'))) assert_equal(t, e)
def test_tok_instruction(): e = [(instruction, '<?foo?>')] t = list(tok(io.StringIO('<?foo?>'))) assert_equal(t, e)
def test_empty_text(): t = list(tok(io.StringIO(''))) e = [] assert_equal(t, e)
def tree(xmlfile): """Parses XML file and prints a `tree` representation of it.""" from sax.parser.core import pp, xml print(pp(xml(lt.tok(xmlfile))))
def test_text_opening_text_closing(): bs = 'eww<foo>bar</foo>' i = list(tok(io.StringIO(bs))) e = [(text, 'eww'), (opening, '<foo>'), (text, 'bar'), (closing, '</foo>')] assert_equal(i, e)
def test_instruction_text_instruction(): bs = '<?xml version="1.0" encoding="UTF-8"?>text<?instruction?>' i = list(tok(io.StringIO(bs))) e = [(instruction, '<?xml version="1.0" encoding="UTF-8"?>'), (text, 'text'), (instruction, '<?instruction?>')] assert_equal(i, e)
def test_instruction_too_short(): bs = '<?xml version="1.0" encoding="UTF-8"?' i = list(tok(io.StringIO(bs))) e = [(error, '<?xml version="1.0" encoding="UTF-8"?')] assert_equal(i, e)
def test_selfclosing(): bs = '<foo/>' i = list(tok(io.StringIO(bs))) e = [(selfclosing, '<foo/>')] assert_equal(i, e)
def test_opening_attrs(): bs = '<foo a="a" b="b">' i = list(tok(io.StringIO(bs))) e = [(opening, '<foo a="a" b="b">')] assert_equal(i, e)
def test_text_opening(): bs = 'text<foo>' t = list(tok(io.StringIO(bs))) e = [(text, 'text'), (opening, '<foo>')] assert_equal(t, e)
def test_tok_text(): e = [(text, 'foo')] t = list(tok(io.StringIO('foo'))) assert_equal(t, e)
def test_opening_closing(): bs = '<foo></foo>' i = list(tok(io.StringIO(bs))) e = [(opening, '<foo>'), (closing, '</foo>')] assert_equal(i, e)
import os import click import sax.tokenizer.gen as gt import sax.tokenizer.loop as lt src = './samples/dbus-systemd1.xml' tsg = list(gt.tok(open(src))) tsl = list(lt.tok(open(src))) def k(n=32): for k, t in tsl[:n]: print(k, t) diffs = [(g, l) for g, l in zip(tsg, tsl) if g != l] ''' Intersting, parsers have different streams. Mostly out of sync because of the selfclosing `tokenistic sugar` trick, causing more tokens emitted by gen.tok: <sctag/> -> (opening, sctag), (closing, sctag) instead of <sctag/> -> (selfclosing, sctag) but may resolve the same trees when parsed.