コード例 #1
0
ファイル: simplify_el.py プロジェクト: mik01aj/corthus
"""

from __future__ import unicode_literals

import sys
import unicodedata

from expand_cu import multi_replace

#
# output_charset = "ςεέρτυύθιίϊοόπαάσδφγηήξκλζχψωώβνμ"
#

number_pairs = zip(["α'", "β'", "γ'", "δ'", "ε'", "στ'", "ζ'", "θ'"],
                   [str(i) for i in range(1, 10)])
convert_numbers = multi_replace(number_pairs)

def simplify_el(string):
    result = []
    for c in string:
        try:
            name = unicodedata.name(c).split()
        except ValueError:
            continue
        if 'WITH' in name:
            assert name[4] == 'WITH'
            # possible diacritics: TONOS OXIA DIALYTIKA VARIA DASIA
            #                      PERISPOMENI PROSGEGRAMMENI YPOGEGRAMMENI
            diacritics = []
            if 'DIALYTIKA' in name[5:]:
                diacritics.append('DIALYTIKA')
コード例 #2
0
ファイル: hip2unicode.py プロジェクト: mik01aj/corthus
    ("\\У",        '\u2DF9'),
    ("\\jе",       '\u2DFA'),
    ("\\ю",        '\u2DFB'),
    ("\\Ю",        '\u2DFB'),
    ("\\jа",       '\u2DFC'),
    ("\\я",        '\u2DFD'),
    ("\\Я",        '\u2DFD'),
    ("\\u",        '\u2DFE'),
    ("\\U",        '\u2DFE'),
    ("\\ju",       '\u2DFF'),
    ("\\и",        '\uA675'),
    ("\\И",        '\uA675'),
    ("\\^",        '\u0487'),
    ("\\-",        '\u0487'),
    ("#",          '\u0482'),
    ("@",          '\uA67E'),
    ("+",          '\u2020'),
    ("*",          '\uA673'),
    ("<*>",        '\uA673'),
    ("<М\\р>",     '\u1F545'),
    ("<+>",        '\u1F542'),
    ("<\\+/>",     '\u1F541'),
    ("<(+)>",      '\u1F540'),
    ("<(:.>",      '\u1F543'),
    ("<.:)>",      '\u1F544'),
    ("<тьматем>",  '\u0430\uA672'),
    ("<->",        '\u2011'),
]

hip2unicode = multi_replace(PAIRS, longest_first=True)