Beispiel #1
0
def _normalize(bs: bistr, normalizer: icu.Normalizer2) -> bistr:
    builder = BistrBuilder(bs)
    us = icu.UnicodeString(bs.modified)
    offset = 0
    while not builder.is_complete:
        i = normalizer.spanQuickCheckYes(us)
        builder.skip(us.countChar32(0, i))
        if builder.is_complete:
            break
        us = us[i:]

        i = 0
        while i < len(us):
            if us.charAt(i) & 0xFC00 == 0xD800:
                i += 1
            i += 1
            if normalizer.hasBoundaryBefore(chr(us.char32At(i))):
                break

        chunk = us[:i]
        normalized = str(normalizer.normalize(chunk))
        builder.replace(chunk.countChar32(), normalized)
        us = us[i:]

    return builder.build()
Beispiel #2
0
def _normalize(normalizer: icu.Normalizer2, bs: bistr) -> bistr:
    builder = BistrBuilder(bs)
    current = builder.current

    while not builder.is_complete:
        i = builder.position
        j = i + 1
        while j < len(current) and not normalizer.hasBoundaryBefore(current[j]):
            j += 1

        chunk = current[i:j]
        repl = normalizer.normalize(chunk)
        if repl == chunk:
            builder.skip(len(chunk))
        else:
            builder.replace(len(chunk), repl)

    return builder.build()