Python Utf8StringIterator Examples

Programming Language: Python

Namespace/Package Name: rpython.rlib.rutf8

Method/Function: Utf8StringIterator

Examples at hotexamples.com: 4

Python Utf8StringIterator - 4 examples found. These are the top rated real world Python examples of rpython.rlib.rutf8.Utf8StringIterator extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

    def UnknownEncodingHandler(self, space, name, info):
        # Yes, supports only 8bit encodings
        translationmap, lgt = space.utf8_len_w(
            space.call_method(space.newbytes(self.all_chars), "decode",
                              space.newtext(name), space.newtext("replace")))

        if lgt != 256:
            raise oefmt(space.w_ValueError,
                        "multi-byte encodings are not supported")

        i = 0
        for c in rutf8.Utf8StringIterator(translationmap):
            if c == 0xfffd:
                info.c_map[i] = rffi.cast(rffi.INT, -1)
            else:
                info.c_map[i] = rffi.cast(rffi.INT, c)
            i += 1
        info.c_data = lltype.nullptr(rffi.VOIDP.TO)
        info.c_convert = lltype.nullptr(rffi.VOIDP.TO)
        info.c_release = lltype.nullptr(rffi.VOIDP.TO)
        return True

Example #2

Show file

File: interp_encoder.py Project: zcxowwww/pypy

def raw_encode_basestring_ascii(space, w_string):
    if space.isinstance_w(w_string, space.w_bytes):
        s = space.bytes_w(w_string)
        for i in range(len(s)):
            c = s[i]
            if c >= ' ' and c <= '~' and c != '"' and c != '\\':
                pass
            else:
                first = i
                break
        else:
            # the input is a string with only non-special ascii chars
            return w_string

        unicodehelper.check_utf8_or_raise(space, s)
        sb = StringBuilder(len(s))
        sb.append_slice(s, 0, first)
    else:
        # We used to check if 'u' contains only safe characters, and return
        # 'w_string' directly.  But this requires an extra pass over all
        # characters, and the expected use case of this function, from
        # json.encoder, will anyway re-encode a unicode result back to
        # a string (with the ascii encoding).  This requires two passes
        # over the characters.  So we may as well directly turn it into a
        # string here --- only one pass.
        s = space.utf8_w(w_string)
        sb = StringBuilder(len(s))
        first = 0

    it = rutf8.Utf8StringIterator(s)
    for i in range(first):
        it.next()
    for c in it:
        if c <= ord('~'):
            if c == ord('"') or c == ord('\\'):
                sb.append('\\')
            elif c < ord(' '):
                sb.append(ESCAPE_BEFORE_SPACE[c])
                continue
            sb.append(chr(c))
        else:
            if c <= ord(u'\uffff'):
                sb.append('\\u')
                sb.append(HEX[c >> 12])
                sb.append(HEX[(c >> 8) & 0x0f])
                sb.append(HEX[(c >> 4) & 0x0f])
                sb.append(HEX[c & 0x0f])
            else:
                # surrogate pair
                n = c - 0x10000
                s1 = 0xd800 | ((n >> 10) & 0x3ff)
                sb.append('\\ud')
                sb.append(HEX[(s1 >> 8) & 0x0f])
                sb.append(HEX[(s1 >> 4) & 0x0f])
                sb.append(HEX[s1 & 0x0f])
                s2 = 0xdc00 | (n & 0x3ff)
                sb.append('\\ud')
                sb.append(HEX[(s2 >> 8) & 0x0f])
                sb.append(HEX[(s2 >> 4) & 0x0f])
                sb.append(HEX[s2 & 0x0f])

    res = sb.build()
    return space.newtext(res)

Example #3

Show file

def descr_upper(s):
    builder = rutf8.Utf8StringBuilder(len(s))
    for ch in rutf8.Utf8StringIterator(s):
        ch = unicodedb.toupper(ch)
        builder.append_code(ch)
    return builder.build()

Example #4

Show file

def test_utf8_iterator(arg):
    u = rutf8.Utf8StringIterator(arg.encode('utf8'))
    l = []
    for c in u:
        l.append(unichr(c))
    assert list(arg) == l