def test_attr_serialization_and_parsing(self): n = Node('employee', 'James Bond').with_pos(46) n.attr['branch'] = 'Secret Service' n.attr['id'] = '007' # json json = n.as_json() tree = parse_json_syntaxtree(json) # print() # XML xml = n.as_xml() assert xml.find('_pos') < 0 xml = n.as_xml('') assert xml.find('_pos') >= 0 tree = parse_xml(xml) assert tree.pos == 46 assert not '_pos' in tree.attr tree = parse_xml(xml, ignore_pos=True) assert '_pos' in tree.attr assert tree._pos < 0 # S-Expression sxpr = n.as_sxpr() assert sxpr.find('pos') < 0 sxpr = n.as_sxpr('') assert sxpr.find('pos') >= 0 tree = parse_sxpr(sxpr) assert tree.pos == 46 assert not 'pos' in tree.attr
def test_plaintext_handling(self): tree = parse_xml('<a>alpha <b>beta</b> gamma</a>') assert flatten_sxpr(tree.as_sxpr( )) == '(a (:Text "alpha ") (b "beta") (:Text " gamma"))' tree = parse_xml(' <a> <b>beta</b> </a> ') assert flatten_xml(tree.as_xml()) == \ '<a><ANONYMOUS_Text__> </ANONYMOUS_Text__><b>beta</b>' \ '<ANONYMOUS_Text__> </ANONYMOUS_Text__></a>' assert tree.as_xml(inline_tags={'a'}, string_tags={':Text'}) == '<a> <b>beta</b> </a>' tree = parse_xml(' <a>\n <b>beta</b>\n</a> ') assert tree.as_xml(inline_tags={'a'}) == '<a><b>beta</b></a>'
def test_roundtrip(self): tree = parse_sxpr('(a (b c) (d (e f) (h i)))') xml = tree.as_xml() fxml = flatten_xml(xml) assert fxml == '<a><b>c</b><d><e>f</e><h>i</h></d></a>' tree2 = parse_xml(fxml) assert fxml == flatten_xml(tree2.as_xml())
def test_as_etree(self): import xml.etree.ElementTree as ET # import lxml.etree as ET sxpr = '(R (A "1") (S (B `(class "bold") "2")) (C "3"))' xml = '<R><A>1</A><S><B class="bold">2</B></S><C>3</C></R>' node = parse_sxpr(sxpr) et = node.as_etree() assert ET.tostring(et, encoding="unicode") == xml, ET.tostring( et, encoding="unicode") node = Node.from_etree(et) assert node.as_sxpr() == sxpr et = ET.XML( '<R>mixed <A>1</A>mode <!-- comment --><B class="italic" /></R>') node = Node.from_etree(et) expected_sxpr = '(R (:Text "mixed ") (A "1") (:Text "mode ") (B `(class "italic")))' assert node.as_sxpr() == expected_sxpr et = node.as_etree() et = ET.XML(ET.tostring(et, encoding="unicode")) node = Node.from_etree(et) assert node.as_sxpr() == expected_sxpr empty_tags = set() tree = parse_xml('<a><b>1<c>2<d />3</c></b>4</a>', out_empty_tags=empty_tags) etree = tree.as_etree(empty_tags=empty_tags) assert ET.tostring(etree).replace( b' /', b'/') == b'<a><b>1<c>2<d/>3</c></b>4</a>' tree = Node.from_etree(etree) assert flatten_sxpr(tree.as_sxpr()) == \ '(a (b (:Text "1") (c (:Text "2") (d) (:Text "3"))) (:Text "4"))'
def test_PI_and_DTD(self): """PIs <?...> and DTDs <!...> and the like should politely be overlooked.""" testdata = """<!DOCTYPE nonsense> <?xpacket begin='' id='W5M0MpCehiHzreSzNTczkc9d'?> <?xpacket begin="r" id="Arnold-Mueller2017a"?> <x:xmpmeta xmlns:x="adobe:ns:meta/"> <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <rdf:Description xmlns:bibtex="http://jabref.sourceforge.net/bibteXMP/" bibtex:bibtexkey="Arnold-Mueller2017a" bibtex:journal="Informationspraxis" bibtex:title="Wie permanent sind Permalinks?" bibtex:type="Article" bibtex:doi="http://dx.doi.org/10.11588/ip.2016.2.33483" bibtex:year="2017" bibtex:volume="3" bibtex:issue="1" bibtex:url="http://www.eckhartarnold.de/papers/2016_Permalinks/Arnold_Mueller_2016_Permalinks.html"> <bibtex:author>Eckhart Arnold</bibtex:author> </rdf:Description> <!-- comment --> </rdf:RDF> </x:xmpmeta> <?xpacket end="r"?> <?xpacket end='r'?>""" tree = parse_xml(testdata) assert tree.tag_name == 'x:xmpmeta' author = tree.pick('bibtex:author') assert author and author.content == "Eckhart Arnold" description = tree.pick('rdf:Description') assert description.has_attr('bibtex:title')
def test_collapse_children_if_plain(self): xml = "<EINZEILER><DEU_WORT>spectat</DEU_WORT><WS> </WS><DEU_WORT>ad</DEU_WORT>" +\ "<WS> </WS><DEU_WORT>gravitatem</DEU_WORT><TEIL_SATZZEICHEN>,</TEIL_SATZZEICHEN>" +\ "<WS> </WS><DEU_WORT>momentum</DEU_WORT></EINZEILER>" tree = parse_xml(xml) assert tree.as_xml(inline_tags={'EINZEILER'}) == xml collapse_children_if([tree], lambda l: True, self.Text) assert tree.as_xml(inline_tags={'EINZEILER'}) == \ "<EINZEILER><Text>spectat ad gravitatem, momentum</Text></EINZEILER>"
def test_collapse_children_if_structured(self): xml = """<Stelle> <DEU_WORT>p.</DEU_WORT> <SEITENZAHL>26</SEITENZAHL> <HOCHGESTELLT>b</HOCHGESTELLT> <TEIL_SATZZEICHEN>,</TEIL_SATZZEICHEN> <SEITENZAHL>18</SEITENZAHL> </Stelle>""" tree = parse_xml(xml) collapse_children_if( [tree], lambda context: context[-1].tag_name != 'HOCHGESTELLT', self.Text) assert tree.as_xml(inline_tags={'Stelle'}) == \ "<Stelle><Text>p.26</Text><HOCHGESTELLT>b</HOCHGESTELLT><Text>,18</Text></Stelle>"
def profile_serializing(): with open(os.path.join(scriptpath, 'data', 'inferus.ausgabe.xml')) as f: data = f.read() tree = parse_xml(data) print('XML inferus') cpu_profile(tree.as_xml, 100) print('S-Expression inferus') cpu_profile(lambda: tree.as_sxpr(compact=True), 100) print('json inferus') cpu_profile(lambda: tree.as_json(indent=None), 100) print('toolkit.json_dumps inferus') cpu_profile(lambda: json_dumps(tree.to_json_obj()), 100) with open(os.path.join(scriptpath, 'data', 'testdoc3.xml')) as f: data = f.read() tree = parse_xml(data) print('XML testdoc3') cpu_profile(tree.as_xml, 100) print('S-Expression testdoc3') cpu_profile(lambda: tree.as_sxpr(compact=True), 100) print('json testdoc3') cpu_profile(lambda: tree.as_json(indent=None), 100) print('toolkit.json_dumps testdoc3') cpu_profile(lambda: json_dumps(tree.to_json_obj()), 100)
def test_compact_representation(self): tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))') compact = tree.as_sxpr(compact=True, flatten_threshold=0) assert compact == '(A\n (B\n (C "D")\n (E "F"))\n (G "H"))' tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))') compact = tree.as_sxpr(compact=True, flatten_threshold=0) assert compact == '(A\n (B\n (C\n "D"\n "X")\n (E "F"))' \ '\n (G\n " H "\n " Y "))' tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))') C = tree['B']['C'] C.attr['attr'] = 'val' threshold = get_config_value('flatten_sxpr_threshold') set_config_value('flatten_sxpr_threshold', 20) compact = tree.serialize('indented') # assert compact == 'A\n B\n C `(attr "val")\n "D"\n E\n "F"\n G\n "H"' assert compact == 'A\n B\n C `(attr "val") "D"\n E "F"\n G "H"', compact tree = parse_xml( '<note><priority level="high" /><remark></remark></note>') assert tree.serialize( how='indented') == 'note\n priority `(level "high")\n remark' set_config_value('flatten_sxpr_threshold', threshold)
def test_endlessloop_error(self): tree = parse_xml(r'<LINEFEED>\\</LINEFEED>') assert tree
def test_flatten_xml(self): tree = parse_xml('<alpha>\n <beta>gamma</beta>\n</alpha>') flat_xml = flatten_xml(tree.as_xml()) assert flat_xml == '<alpha><beta>gamma</beta></alpha>', flat_xml