Ejemplo n.º 1
0
def encode(ms, encoding='unicode', pretty_print=False):
    e = etree.Element('mrs-list')
    for m in ms:
        e.append(encode_mrs(m))
    if pretty_print:
        import re
        pprint_re = re.compile(r'(<mrs[^-]|</mrs>|</mrs-list>'
                               r'|<ep\s|<fvpair>|<extrapair>|<hcons\s)',
                               re.IGNORECASE)
        string = etree_tostring(e, encoding=encoding)
        return pprint_re.sub(r'\n\1', string)
    return etree_tostring(e, encoding=encoding)
Ejemplo n.º 2
0
def serialize(ms, strict=False, encoding='unicode', pretty_print=False):
    e = etree.Element('dmrs-list')
    for m in ms:
        e.append(_encode_dmrs(m, strict=strict))
    # for now, pretty_print=True is the same as pretty_print='LKB'
    if pretty_print in ('LKB', 'lkb', 'Lkb', True):
        lkb_pprint_re = re.compile(r'(<dmrs[^>]+>|</node>|</link>|</dmrs>)')
        string = str(etree_tostring(e, encoding=encoding))
        return lkb_pprint_re.sub(r'\1\n', string)
    # pretty_print is only lxml. Look into tostringlist, maybe?
    # return etree.tostring(e, pretty_print=pretty_print, encoding='unicode')
    return etree_tostring(e, encoding=encoding)
Ejemplo n.º 3
0
def serialize(ms, strict=False, encoding='unicode', pretty_print=False):
    e = etree.Element('dmrs-list')
    for m in ms:
        e.append(_encode_dmrs(m, strict=strict))
    # for now, pretty_print=True is the same as pretty_print='LKB'
    if pretty_print in ('LKB', 'lkb', 'Lkb', True):
        lkb_pprint_re = re.compile(r'(<dmrs[^>]+>|</node>|</link>|</dmrs>)')
        string = str(etree_tostring(e, encoding=encoding))
        return lkb_pprint_re.sub(r'\1\n', string)
    # pretty_print is only lxml. Look into tostringlist, maybe?
    # return etree.tostring(e, pretty_print=pretty_print, encoding='unicode')
    return etree_tostring(e, encoding=encoding)
Ejemplo n.º 4
0
def test_etree_tostring():
    import xml.etree.ElementTree as etree
    e = etree.Element('a')
    e.text = 'a'
    assert etree_tostring(e, encoding='unicode') == u'<a>a</a>'
    e.text = u'あ'
    assert etree_tostring(e, encoding='unicode') == u'<a>あ</a>'
    e.text = 'あ'
    assert etree_tostring(e, encoding='unicode') == u'<a>あ</a>'
    b = etree.SubElement(e, 'b')
    b.text = 'あ'
    assert etree_tostring(e, encoding='unicode') == u'<a>あ<b>あ</b></a>'