def encode(ms, encoding='unicode', pretty_print=False): e = etree.Element('mrs-list') for m in ms: e.append(encode_mrs(m)) if pretty_print: import re pprint_re = re.compile(r'(<mrs[^-]|</mrs>|</mrs-list>' r'|<ep\s|<fvpair>|<extrapair>|<hcons\s)', re.IGNORECASE) string = etree_tostring(e, encoding=encoding) return pprint_re.sub(r'\n\1', string) return etree_tostring(e, encoding=encoding)
def serialize(ms, strict=False, encoding='unicode', pretty_print=False): e = etree.Element('dmrs-list') for m in ms: e.append(_encode_dmrs(m, strict=strict)) # for now, pretty_print=True is the same as pretty_print='LKB' if pretty_print in ('LKB', 'lkb', 'Lkb', True): lkb_pprint_re = re.compile(r'(<dmrs[^>]+>|</node>|</link>|</dmrs>)') string = str(etree_tostring(e, encoding=encoding)) return lkb_pprint_re.sub(r'\1\n', string) # pretty_print is only lxml. Look into tostringlist, maybe? # return etree.tostring(e, pretty_print=pretty_print, encoding='unicode') return etree_tostring(e, encoding=encoding)
def test_etree_tostring(): import xml.etree.ElementTree as etree e = etree.Element('a') e.text = 'a' assert etree_tostring(e, encoding='unicode') == u'<a>a</a>' e.text = u'あ' assert etree_tostring(e, encoding='unicode') == u'<a>あ</a>' e.text = 'あ' assert etree_tostring(e, encoding='unicode') == u'<a>あ</a>' b = etree.SubElement(e, 'b') b.text = 'あ' assert etree_tostring(e, encoding='unicode') == u'<a>あ<b>あ</b></a>'