return _icu.character_name_from_code(code) or '' except (TypeError, ValueError, KeyError): return '' def normalize(text, mode='NFC'): # This is very slightly slower than using unicodedata.normalize, so stick with # that unless you have very good reasons not too. Also, it's speed # decreases on wide python builds, where conversion to/from ICU's string # representation is slower. return _icu.normalize(_nmodes[mode], unicode(text)) def contractions(col=None): global _cmap col = col or _collator if col is None: col = collator() ans = _cmap.get(collator, None) if ans is None: ans = col.contractions() ans = frozenset(filter(None, ans)) _cmap[col] = ans return ans ################################################################################ if __name__ == '__main__': from calibre.utils.icu_test import run run(verbosity=4)
from collections import OrderedDict items = sorted(items, key=lambda x:sort_key(key(x)), reverse=reverse) ans = OrderedDict() last_c, last_ordnum = ' ', 0 for item in items: c = icu_upper(key(item) or ' ') ordnum, ordlen = collation_order(c) if last_ordnum != ordnum: if not is_narrow_build: ordlen = 1 last_c = c[0:ordlen] last_ordnum = ordnum try: ans[last_c].append(item) except KeyError: ans[last_c] = [item] return ans # Return the number of unicode codepoints in a string string_length = _icu.string_length if is_narrow_build else len # Return the number of UTF-16 codepoints in a string utf16_length = len if is_narrow_build else _icu.utf16_length ################################################################################ if __name__ == '__main__': from calibre.utils.icu_test import run run(verbosity=4)