Python Normalizer2.hasBoundaryBefore Examples

Programming Language: Python

Namespace/Package Name: icu

Class/Type: Normalizer2

Method/Function: hasBoundaryBefore

Examples at hotexamples.com: 3

Python Normalizer2.hasBoundaryBefore - 3 examples found. These are the top rated real world Python examples of icu.Normalizer2.hasBoundaryBefore extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

getInstance(7)

hasBoundaryBefore(3)

getNFCInstance(2)

normalize(2)

getNFDInstance(1)

getNFKCCasefoldInstance(1)

getNFKCInstance(1)

getNFKDInstance(1)

spanQuickCheckYes(1)

Example #1

Show file

File: _icu.py Project: zxlzr/bistring

def _normalize(bs: bistr, normalizer: icu.Normalizer2) -> bistr:
    builder = BistrBuilder(bs)
    us = icu.UnicodeString(bs.modified)
    offset = 0
    while not builder.is_complete:
        i = normalizer.spanQuickCheckYes(us)
        builder.skip(us.countChar32(0, i))
        if builder.is_complete:
            break
        us = us[i:]

        i = 0
        while i < len(us):
            if us.charAt(i) & 0xFC00 == 0xD800:
                i += 1
            i += 1
            if normalizer.hasBoundaryBefore(chr(us.char32At(i))):
                break

        chunk = us[:i]
        normalized = str(normalizer.normalize(chunk))
        builder.replace(chunk.countChar32(), normalized)
        us = us[i:]

    return builder.build()

Example #2

Show file

File: generate_unicode.py Project: yazici/bistring

def gen_boundary_regex(normalizer: icu.Normalizer2) -> str:
    ranges = []
    for cp in range(0x110000):
        if not normalizer.hasBoundaryBefore(chr(cp)):
            if ranges and cp == ranges[-1].stop:
                ranges[-1] = range(ranges[-1].start, cp + 1)
            else:
                ranges.append(range(cp, cp + 1))

    chunks = ['/.[']
    for r in ranges:
        chunks.append(escape(r.start))
        if len(r) > 1:
            chunks.append('-')
            chunks.append(escape(r.stop - 1))
    chunks.append(']*/gsu')

    return "".join(chunks)

Example #3

Show file

File: _icu.py Project: yazici/bistring

def _normalize(normalizer: icu.Normalizer2, bs: bistr) -> bistr:
    builder = BistrBuilder(bs)
    current = builder.current

    while not builder.is_complete:
        i = builder.position
        j = i + 1
        while j < len(current) and not normalizer.hasBoundaryBefore(current[j]):
            j += 1

        chunk = current[i:j]
        repl = normalizer.normalize(chunk)
        if repl == chunk:
            builder.skip(len(chunk))
        else:
            builder.replace(len(chunk), repl)

    return builder.build()