def test_xml_cv(): s = open('./samples/cv.xml') t = len(xml(tok(s)).children) e = None assert_not_equal(t, e)
def test_xml(): s = open('./samples/dbus.xml') t = xml(tok(s)) e = Root(children=[ Instruction(instruction='<?xml version="1.0"?>'), Text(text=' '), Comment(comment='<!--*-nxml-*-->'), Text(text='\n'), Doctype( doctype='<!DOCTYPE busconfig PUBLIC "-//freedesktop//DTD D-BUS ' 'Bus Configuration 1.0//EN"\n "http://www.freedesktop.org/' 'standards/dbus/1.0/busconfig.dtd">'), Text(text='\n\n'), Comment(comment='<!--\n This file is part of systemd.\n\n systemd ' 'is free software; you can redistribute it and/or modify ' 'it\n under the terms of the GNU Lesser General Public ' 'License as published by\n the Free Software Foundation; ' 'either version 2.1 of the License, or\n (at your option) ' 'any later version.\n-->'), Text(text='\n\n'), Tag(name='<busconfig>', attrs=[], children=[ Text(text='\n\n '), Tag(name='<policy user="******">', attrs=[], children=[ Text(text='\n '), Tag(name='<allow own="org.freedesktop.' 'timedate1"/>', attrs=[], children=[]), Text(text='\n '), Tag(name='<allow send_destination="org.' 'freedesktop.timedate1"/>', attrs=[], children=[]), Text(text='\n '), Tag(name='<allow receive_sender="org.' 'freedesktop.timedate1"/>', attrs=[], children=[]), Text(text='\n ') ]), Text(text='\n\n '), Tag(name='<policy context="default">', attrs=[], children=[ Text(text='\n '), Tag(name='<allow send_destination="' 'org.freedesktop.timedate1"/>', attrs=[], children=[]), Text(text='\n '), Tag(name='<allow receive_sender="org.' 'freedesktop.timedate1"/>', attrs=[], children=[]), Text(text='\n ') ]), Text(text='\n\n') ]), Text(text='\n') ]) assert_equal(t, e)
def test_1(): '''bug''' s = io.StringIO('<foo><bar>duh</bar></foo>') t = xml(tok(s)) e = Root([Tag('<foo>', [], [Tag('<bar>', [], [Text('duh')])])]) assert_equal(t, e)
def test_malformed(): s = io.StringIO('<foo><bar>xxx</foo></bar>') try: t = xml(tok(s)) except MalformedXML: pass
def test_0(): s = io.StringIO('<foo>x</foo>') t = xml(tok(s)) e = Root([Tag('<foo>', [], [Text('x')])]) assert_equal(t, e)
import os import click import sax.tokenizer.gen as gt import sax.tokenizer.loop as lt src = './samples/dbus-systemd1.xml' tsg = list(gt.tok(open(src))) tsl = list(lt.tok(open(src))) def k(n=32): for k, t in tsl[:n]: print(k, t) diffs = [(g, l) for g, l in zip(tsg, tsl) if g != l] ''' Intersting, parsers have different streams. Mostly out of sync because of the selfclosing `tokenistic sugar` trick, causing more tokens emitted by gen.tok: <sctag/> -> (opening, sctag), (closing, sctag) instead of <sctag/> -> (selfclosing, sctag) but may resolve the same trees when parsed.