Esempio n. 1
0
        return _icu.character_name_from_code(code) or ''
    except (TypeError, ValueError, KeyError):
        return ''

def normalize(text, mode='NFC'):
    # This is very slightly slower than using unicodedata.normalize, so stick with
    # that unless you have very good reasons not too. Also, it's speed
    # decreases on wide python builds, where conversion to/from ICU's string
    # representation is slower.
    return _icu.normalize(_nmodes[mode], unicode(text))

def contractions(col=None):
    global _cmap
    col = col or _collator
    if col is None:
        col = collator()
    ans = _cmap.get(collator, None)
    if ans is None:
        ans = col.contractions()
        ans = frozenset(filter(None, ans))
        _cmap[col] = ans
    return ans


################################################################################

if __name__ == '__main__':
    from calibre.utils.icu_test import run
    run(verbosity=4)

Esempio n. 2
0
    from collections import OrderedDict
    items = sorted(items, key=lambda x:sort_key(key(x)), reverse=reverse)
    ans = OrderedDict()
    last_c, last_ordnum = ' ', 0
    for item in items:
        c = icu_upper(key(item) or ' ')
        ordnum, ordlen = collation_order(c)
        if last_ordnum != ordnum:
            if not is_narrow_build:
                ordlen = 1
            last_c = c[0:ordlen]
            last_ordnum = ordnum
        try:
            ans[last_c].append(item)
        except KeyError:
            ans[last_c] = [item]
    return ans


# Return the number of unicode codepoints in a string
string_length = _icu.string_length if is_narrow_build else len

# Return the number of UTF-16 codepoints in a string
utf16_length = len if is_narrow_build else _icu.utf16_length

################################################################################

if __name__ == '__main__':
    from calibre.utils.icu_test import run
    run(verbosity=4)