""" from __future__ import unicode_literals import sys import unicodedata from expand_cu import multi_replace # # output_charset = "ςεέρτυύθιίϊοόπαάσδφγηήξκλζχψωώβνμ" # number_pairs = zip(["α'", "β'", "γ'", "δ'", "ε'", "στ'", "ζ'", "θ'"], [str(i) for i in range(1, 10)]) convert_numbers = multi_replace(number_pairs) def simplify_el(string): result = [] for c in string: try: name = unicodedata.name(c).split() except ValueError: continue if 'WITH' in name: assert name[4] == 'WITH' # possible diacritics: TONOS OXIA DIALYTIKA VARIA DASIA # PERISPOMENI PROSGEGRAMMENI YPOGEGRAMMENI diacritics = [] if 'DIALYTIKA' in name[5:]: diacritics.append('DIALYTIKA')
("\\У", '\u2DF9'), ("\\jе", '\u2DFA'), ("\\ю", '\u2DFB'), ("\\Ю", '\u2DFB'), ("\\jа", '\u2DFC'), ("\\я", '\u2DFD'), ("\\Я", '\u2DFD'), ("\\u", '\u2DFE'), ("\\U", '\u2DFE'), ("\\ju", '\u2DFF'), ("\\и", '\uA675'), ("\\И", '\uA675'), ("\\^", '\u0487'), ("\\-", '\u0487'), ("#", '\u0482'), ("@", '\uA67E'), ("+", '\u2020'), ("*", '\uA673'), ("<*>", '\uA673'), ("<М\\р>", '\u1F545'), ("<+>", '\u1F542'), ("<\\+/>", '\u1F541'), ("<(+)>", '\u1F540'), ("<(:.>", '\u1F543'), ("<.:)>", '\u1F544'), ("<тьматем>", '\u0430\uA672'), ("<->", '\u2011'), ] hip2unicode = multi_replace(PAIRS, longest_first=True)