Esempio n. 1
0
        def f(x):

            s1 = "".join(["\xd7\x90\xd6\x96\xeb\x96\x95\xf0\x90\x91\x93"] * x)
            u, consumed = runicode.str_decode_utf_8(s1, len(s1), 'strict',
                                                    allow_surrogates=True)
            s2 = runicode.unicode_encode_utf_8(u, len(u), 'strict',
                                                    allow_surrogates=True)
            u3, consumed3 = runicode.str_decode_utf_8(s1, len(s1), 'strict',
                                                    allow_surrogates=False)
            s3 = runicode.unicode_encode_utf_8(u3, len(u3), 'strict',
                                                    allow_surrogates=False)
            return s1 == s2 == s3
Esempio n. 2
0
        def f(x):

            s1 = "".join(["\xd7\x90\xd6\x96\xeb\x96\x95\xf0\x90\x91\x93"] * x)
            u, consumed = runicode.str_decode_utf_8(s1, len(s1), 'strict',
                                                    allow_surrogates=True)
            s2 = runicode.unicode_encode_utf_8(u, len(u), 'strict',
                                                    allow_surrogates=True)
            u3, consumed3 = runicode.str_decode_utf_8(s1, len(s1), 'strict',
                                                    allow_surrogates=False)
            s3 = runicode.unicode_encode_utf_8(u3, len(u3), 'strict',
                                                    allow_surrogates=False)
            return s1 == s2 == s3
Esempio n. 3
0
def hex_to_utf8(state, token, s):
    try:
        uchr = UNICHR(int(s, 16))
        return unicode_encode_utf_8(uchr, len(uchr), 'strict')
    except (ValueError, UnicodeDecodeError):
        # XXX better error message
        raise errorhandler(state, token, msg="Error encoding %s" % s)
Esempio n. 4
0
def fsencode(space, w_uni):
    state = space.fromcache(interp_codecs.CodecState)
    if _WIN32:
        uni = space.unicode_w(w_uni)
        bytes = unicode_encode_mbcs(uni, len(uni), 'strict',
                                    errorhandler=encode_error_handler(space),
                                    force_replace=False)
    elif _MACOSX:
        uni = space.unicode_w(w_uni)
        bytes = runicode.unicode_encode_utf_8(
            uni, len(uni), 'surrogateescape',
            errorhandler=state.encode_error_handler)
    elif state.codec_need_encodings:
        # bootstrap check: if the filesystem codec is implemented in
        # Python we cannot use it before the codecs are ready. use the
        # locale codec instead
        from pypy.module._codecs.locale import (
            unicode_encode_locale_surrogateescape)
        uni = space.unicode_w(w_uni)
        bytes = unicode_encode_locale_surrogateescape(
            uni, errorhandler=encode_error_handler(space))
    else:
        from pypy.module.sys.interp_encoding import getfilesystemencoding
        return space.call_method(w_uni, 'encode',
                                 getfilesystemencoding(space),
                                 space.wrap('surrogateescape'))
    return space.wrapbytes(bytes)
Esempio n. 5
0
 def encode_utf_escape(self, utf_escape):
     utf_codepoint = int("".join(utf_escape), 16)
     if utf_codepoint > 0x101111:
         self.error("invalid Unicode codepoint (too large)")
     return [
         c for c in unicode_encode_utf_8(unichr(utf_codepoint), 1, "ignore")
     ]
Esempio n. 6
0
 def test_encode_surrogate_pair_utf8(self):
     u = runicode.UNICHR(0xD800) + runicode.UNICHR(0xDC00)
     if runicode.MAXUNICODE < 65536:
         # Narrow unicode build, consider utf16 surrogate pairs
         assert runicode.unicode_encode_utf_8(
             u, len(u), True, allow_surrogates=True) == '\xf0\x90\x80\x80'
         assert runicode.unicode_encode_utf_8(
             u, len(u), True, allow_surrogates=False) == '\xf0\x90\x80\x80'
     else:
         # Wide unicode build, merge utf16 surrogate pairs only when allowed
         assert runicode.unicode_encode_utf_8(
             u, len(u), True, allow_surrogates=True) == '\xf0\x90\x80\x80'
         # Surrogates not merged, encoding fails.
         py.test.raises(
             UnicodeEncodeError, runicode.unicode_encode_utf_8,
             u, len(u), True, allow_surrogates=False)
Esempio n. 7
0
def encode_object(space, w_object, encoding, errors):
    if encoding is None:
        # Get the encoder functions as a wrapped object.
        # This lookup is cached.
        w_encoder = space.sys.get_w_default_encoder()
    else:
        if errors is None or errors == 'strict':
            try:
                if encoding == 'ascii':
                    u = space.unicode_w(w_object)
                    eh = unicodehelper.rpy_encode_error_handler()
                    return space.wrap(unicode_encode_ascii(
                            u, len(u), None, errorhandler=eh))
                if encoding == 'utf-8':
                    u = space.unicode_w(w_object)
                    eh = unicodehelper.rpy_encode_error_handler()
                    return space.wrap(unicode_encode_utf_8(
                            u, len(u), None, errorhandler=eh,
                            allow_surrogates=True))
            except unicodehelper.RUnicodeEncodeError, ue:
                raise OperationError(space.w_UnicodeEncodeError,
                                     space.newtuple([
                    space.wrap(ue.encoding),
                    space.wrap(ue.object),
                    space.wrap(ue.start),
                    space.wrap(ue.end),
                    space.wrap(ue.reason)]))
        from pypy.module._codecs.interp_codecs import lookup_codec
        w_encoder = space.getitem(lookup_codec(space, encoding), space.wrap(0))
Esempio n. 8
0
def encode_object(space, w_object, encoding, errors):
    if encoding is None:
        # Get the encoder functions as a wrapped object.
        # This lookup is cached.
        w_encoder = space.sys.get_w_default_encoder()
    else:
        if errors is None or errors == 'strict':
            if encoding == 'ascii':
                u = space.unicode_w(w_object)
                eh = unicodehelper.encode_error_handler(space)
                return space.newbytes(
                    unicode_encode_ascii(u, len(u), None, errorhandler=eh))
            if encoding == 'utf-8':
                u = space.unicode_w(w_object)
                eh = unicodehelper.encode_error_handler(space)
                return space.newbytes(
                    unicode_encode_utf_8(u,
                                         len(u),
                                         None,
                                         errorhandler=eh,
                                         allow_surrogates=True))
        from pypy.module._codecs.interp_codecs import lookup_codec
        w_encoder = space.getitem(lookup_codec(space, encoding),
                                  space.newint(0))
    if errors is None:
        w_errors = space.newtext('strict')
    else:
        w_errors = space.newtext(errors)
    w_restuple = space.call_function(w_encoder, w_object, w_errors)
    w_retval = space.getitem(w_restuple, space.newint(0))
    if not space.isinstance_w(w_retval, space.w_bytes):
        raise oefmt(space.w_TypeError,
                    "encoder did not return an string object (type '%T')",
                    w_retval)
    return w_retval
Esempio n. 9
0
 def handle_keypress(self, c_type, event):
     self.key = 0
     p = rffi.cast(RSDL.KeyboardEventPtr, event)
     sym = rffi.getintfield(p.c_keysym, 'c_sym')
     char = rffi.getintfield(p.c_keysym, 'c_unicode')
     if sym == RSDL.K_DOWN:
         self.key = 31
     elif sym == RSDL.K_LEFT:
         self.key = 28
     elif sym == RSDL.K_RIGHT:
         self.key = 29
     elif sym == RSDL.K_UP:
         self.key = 30
     elif char != 0:
         chars = unicode_encode_utf_8(unichr(char), 1, "ignore")
         if len(chars) == 1:
             asciivalue = ord(chars[0])
             if asciivalue >= 32:
                 self.key = asciivalue
     if self.key == 0 and sym <= 255:
         self.key = sym
     interrupt = self.interrupt_key
     if (interrupt & 0xFF == self.key
             and interrupt >> 8 == self.get_modifier_mask(0)):
         raise KeyboardInterrupt
Esempio n. 10
0
def encode_object(space, w_object, encoding, errors):
    if encoding is None:
        # Get the encoder functions as a wrapped object.
        # This lookup is cached.
        w_encoder = space.sys.get_w_default_encoder()
    else:
        if errors is None or errors == 'strict':
            try:
                if encoding == 'ascii':
                    u = space.unicode_w(w_object)
                    eh = unicodehelper.raise_unicode_exception_encode
                    return space.wrap(
                        unicode_encode_ascii(u, len(u), None, errorhandler=eh))
                if encoding == 'utf-8':
                    u = space.unicode_w(w_object)
                    eh = unicodehelper.raise_unicode_exception_encode
                    return space.wrap(
                        unicode_encode_utf_8(u,
                                             len(u),
                                             None,
                                             errorhandler=eh,
                                             allow_surrogates=True))
            except unicodehelper.RUnicodeEncodeError, ue:
                raise OperationError(
                    space.w_UnicodeEncodeError,
                    space.newtuple([
                        space.wrap(ue.encoding),
                        space.wrap(ue.object),
                        space.wrap(ue.start),
                        space.wrap(ue.end),
                        space.wrap(ue.reason)
                    ]))
        from pypy.module._codecs.interp_codecs import lookup_codec
        w_encoder = space.getitem(lookup_codec(space, encoding), space.wrap(0))
Esempio n. 11
0
 def test_encode_surrogate_pair_utf8(self):
     u = runicode.UNICHR(0xD800) + runicode.UNICHR(0xDC00)
     if runicode.MAXUNICODE < 65536:
         # Narrow unicode build, consider utf16 surrogate pairs
         assert runicode.unicode_encode_utf_8(
             u, len(u), True, allow_surrogates=True) == '\xf0\x90\x80\x80'
         assert runicode.unicode_encode_utf_8(
             u, len(u), True, allow_surrogates=False) == '\xf0\x90\x80\x80'
     else:
         # Wide unicode build, merge utf16 surrogate pairs only when allowed
         assert runicode.unicode_encode_utf_8(
             u, len(u), True, allow_surrogates=True) == '\xf0\x90\x80\x80'
         # Surrogates not merged, encoding fails.
         py.test.raises(
             UnicodeEncodeError, runicode.unicode_encode_utf_8,
             u, len(u), True, allow_surrogates=False)
Esempio n. 12
0
def encode_object(space, w_object, encoding, errors):
    if encoding is None:
        # Get the encoder functions as a wrapped object.
        # This lookup is cached.
        w_encoder = space.sys.get_w_default_encoder()
    else:
        if errors is None or errors == 'strict':
            if encoding == 'ascii':
                u = space.unicode_w(w_object)
                eh = unicodehelper.encode_error_handler(space)
                return space.wrap(unicode_encode_ascii(
                        u, len(u), None, errorhandler=eh))
            if encoding == 'utf-8':
                u = space.unicode_w(w_object)
                eh = unicodehelper.encode_error_handler(space)
                return space.wrap(unicode_encode_utf_8(
                        u, len(u), None, errorhandler=eh,
                        allow_surrogates=True))
        from pypy.module._codecs.interp_codecs import lookup_codec
        w_encoder = space.getitem(lookup_codec(space, encoding), space.wrap(0))
    if errors is None:
        w_errors = space.wrap('strict')
    else:
        w_errors = space.wrap(errors)
    w_restuple = space.call_function(w_encoder, w_object, w_errors)
    w_retval = space.getitem(w_restuple, space.wrap(0))
    if not space.isinstance_w(w_retval, space.w_str):
        raise oefmt(space.w_TypeError,
                    "encoder did not return an string object (type '%T')",
                    w_retval)
    return w_retval
Esempio n. 13
0
def hex_to_utf8(state, token, s):
    try:
        uchr = UNICHR(int(s, 16))
        return unicode_encode_utf_8(uchr, len(uchr), 'strict')
    except (ValueError, UnicodeDecodeError):
        # XXX better error message
        raise errorhandler(state, token, msg="Error encoding %s" % s)
Esempio n. 14
0
def utf_8_encode(space, uni, errors="strict"):
    if errors is None:
        errors = 'strict'
    state = space.fromcache(CodecState)
    result = runicode.unicode_encode_utf_8(
        uni, len(uni), errors, state.encode_error_handler,
        allow_surrogates=True)
    return space.newtuple([space.wrap(result), space.wrap(len(uni))])
Esempio n. 15
0
def encode_utf8(space, uni, allow_surrogates=False):
    # Note that this function never raises UnicodeEncodeError,
    # since surrogate pairs are allowed.
    # This is not the case with Python3.
    return runicode.unicode_encode_utf_8(
        uni, len(uni), "strict",
        errorhandler=encode_error_handler(space),
        allow_surrogates=allow_surrogates)
Esempio n. 16
0
 def f(n):
     x = u'àèì' + unichr(n)
     if x:
         y = u'ìòé'
     else:
         y = u'òìàà'
     # the annotation of y is SomeUnicodeString(can_be_None=False)
     y = unicode_encode_utf_8(y, len(y), 'strict', errorhandler)
     return x.encode('utf-8') + y
Esempio n. 17
0
def encode_utf8(space, uni):
    # Note that this function never raises UnicodeEncodeError,
    # since surrogates are allowed, either paired or lone.
    # A paired surrogate is considered like the non-BMP character
    # it stands for.  These are the Python2 rules; Python3 differs.
    return runicode.unicode_encode_utf_8(
        uni, len(uni), "strict",
        errorhandler=raise_unicode_exception_encode,
        allow_surrogates=True)
Esempio n. 18
0
 def f(n):
     x = u"àèì" + unichr(n)
     if x:
         y = u"ìòé"
     else:
         y = u"òìàà"
     # the annotation of y is SomeUnicodeString(can_be_None=False)
     y = unicode_encode_utf_8(y, len(y), "strict", errorhandler)
     return x.encode("utf-8") + y
Esempio n. 19
0
 def f(n):
     x = u'àèì' + unichr(n)
     if x:
         y = u'ìòé'
     else:
         y = u'òìàà'
     # the annotation of y is SomeUnicodeString(can_be_None=False)
     y = unicode_encode_utf_8(y, len(y), 'strict', errorhandler)
     return x.encode('utf-8') + y
Esempio n. 20
0
def utf_8_encode(space, uni, errors="strict"):
    if errors is None:
        errors = 'strict'
    state = space.fromcache(CodecState)
    result = runicode.unicode_encode_utf_8(uni,
                                           len(uni),
                                           errors,
                                           state.encode_error_handler,
                                           allow_surrogates=True)
    return space.newtuple([space.wrap(result), space.wrap(len(uni))])
Esempio n. 21
0
 def write_raw_cached_string(self, si):
     assert isinstance(si, unicode)
     if self._with_cache:
         idx = self._string_cache.get(si, -1)
         if idx == -1:
             idx = len(self._string_cache)
             self._string_cache[si] = idx
             s = unicode_encode_utf_8(si, len(si), "?")
             write_int_raw(len(s), self)
             assert len(s) <= MAX_STRING_SIZE
             self.write(s)
         else:
             write_int_raw(r_uint(MAX_STRING_SIZE + idx), self)
     else:
         errors = "?"
         s = unicode_encode_utf_8(si, len(si), errors)
         assert len(s) <= MAX_INT32
         write_int_raw(len(s), self)
         self.write(s)
Esempio n. 22
0
def encode_utf8(space, uni):
    # Note that this function never raises UnicodeEncodeError,
    # since surrogates are allowed, either paired or lone.
    # A paired surrogate is considered like the non-BMP character
    # it stands for.  These are the Python2 rules; Python3 differs.
    return runicode.unicode_encode_utf_8(uni,
                                         len(uni),
                                         "strict",
                                         errorhandler=None,
                                         allow_surrogates=True)
Esempio n. 23
0
 def write_raw_cached_string(self, si):
     assert isinstance(si, unicode)
     if self._with_cache:
         idx = self._string_cache.get(si, -1)
         if idx == -1:
             idx = len(self._string_cache)
             self._string_cache[si] = idx
             s = unicode_encode_utf_8(si, len(si), "?")
             write_int_raw(len(s), self)
             assert len(s) <= MAX_STRING_SIZE
             self.write(s)
         else:
             write_int_raw(r_uint(MAX_STRING_SIZE + idx), self)
     else:
         errors = "?"
         s = unicode_encode_utf_8(si, len(si), errors)
         assert len(s) <= MAX_INT32
         write_int_raw(len(s), self)
         self.write(s)
Esempio n. 24
0
 def identifier_w(self, space):
     identifier = self._utf8
     if identifier is not None:
         return identifier
     u = self._value
     eh = unicodehelper.rpy_encode_error_handler()
     try:
         identifier = unicode_encode_utf_8(u, len(u), None, errorhandler=eh)
     except unicodehelper.RUnicodeEncodeError, ue:
         raise wrap_encode_error(space, ue)
Esempio n. 25
0
 def identifier_w(self, space):
     identifier = self._utf8
     if identifier is not None:
         return identifier
     u = self._value
     eh = unicodehelper.rpy_encode_error_handler()
     try:
         identifier = unicode_encode_utf_8(u, len(u), None,
                                           errorhandler=eh)
     except unicodehelper.RUnicodeEncodeError, ue:
         raise wrap_encode_error(space, ue)
Esempio n. 26
0
def encode_utf8(space, uni, allow_surrogates=False):
    # Note that Python3 tends to forbid *all* surrogates in utf-8.
    # If allow_surrogates=True, then revert to the Python 2 behavior
    # which never raises UnicodeEncodeError.  Surrogate pairs are then
    # allowed, either paired or lone.  A paired surrogate is considered
    # like the non-BMP character it stands for.  See also *_utf8sp().
    assert isinstance(uni, unicode)
    return runicode.unicode_encode_utf_8(
        uni,
        len(uni),
        "strict",
        errorhandler=encode_error_handler(space),
        allow_surrogates=allow_surrogates)
Esempio n. 27
0
def unicode_encode_utf8(rope, allow_surrogates=False):
    from rpython.rlib.runicode import unicode_encode_utf_8
    if rope.is_ascii():
        return rope
    elif isinstance(rope, BinaryConcatNode):
        return BinaryConcatNode(unicode_encode_utf8(rope.left),
                                unicode_encode_utf8(rope.right))
    elif isinstance(rope, LiteralUnicodeNode):
        return LiteralStringNode(
            unicode_encode_utf_8(rope.u, len(rope.u), "strict",
                                 allow_surrogates=allow_surrogates))
    elif isinstance(rope, LiteralStringNode):
        return LiteralStringNode(_str_encode_utf_8(rope.s))
Esempio n. 28
0
def unicode_encode_utf8(rope, allow_surrogates=False):
    from rpython.rlib.runicode import unicode_encode_utf_8
    if rope.is_ascii():
        return rope
    elif isinstance(rope, BinaryConcatNode):
        return BinaryConcatNode(unicode_encode_utf8(rope.left),
                                unicode_encode_utf8(rope.right))
    elif isinstance(rope, LiteralUnicodeNode):
        return LiteralStringNode(
            unicode_encode_utf_8(rope.u,
                                 len(rope.u),
                                 "strict",
                                 allow_surrogates=allow_surrogates))
    elif isinstance(rope, LiteralStringNode):
        return LiteralStringNode(_str_encode_utf_8(rope.s))
Esempio n. 29
0
 def handle_keypress(self, c_type, event):
     self.key = 0
     p = rffi.cast(RSDL.KeyboardEventPtr, event)
     sym = rffi.getintfield(p.c_keysym, 'c_sym')
     char = rffi.getintfield(p.c_keysym, 'c_unicode')
     if sym == RSDL.K_DOWN:
         self.key = key_constants.DOWN
     elif sym == RSDL.K_LEFT:
         self.key = key_constants.LEFT
     elif sym == RSDL.K_RIGHT:
         self.key = key_constants.RIGHT
     elif sym == RSDL.K_UP:
         self.key = key_constants.UP
     elif sym == RSDL.K_HOME:
         self.key = key_constants.HOME
     elif sym == RSDL.K_END:
         self.key = key_constants.END
     elif sym == RSDL.K_INSERT:
         self.key = key_constants.INSERT
     elif sym == RSDL.K_PAGEUP:
         self.key = key_constants.PAGEUP
     elif sym == RSDL.K_PAGEDOWN:
         self.key = key_constants.PAGEDOWN
     elif sym == RSDL.K_LSHIFT or sym == RSDL.K_RSHIFT:
         self.key = key_constants.SHIFT
     elif sym == RSDL.K_LCTRL or sym == RSDL.K_RCTRL:
         self.key = key_constants.CTRL
     elif sym == RSDL.K_LALT or sym == RSDL.K_RALT:
         self.key = key_constants.COMMAND
     elif sym == RSDL.K_BREAK:
         self.key = key_constants.BREAK
     elif sym == RSDL.K_CAPSLOCK:
         self.key = key_constants.CAPSLOCK
     elif sym == RSDL.K_NUMLOCK:
         self.key = key_constants.NUMLOCK
     elif sym == RSDL.K_SCROLLOCK:
         self.key = key_constants.SCROLLOCK
     elif char != 0:
         chars = unicode_encode_utf_8(unichr(char), 1, "ignore")
         if len(chars) == 1:
             asciivalue = ord(chars[0])
             if asciivalue >= 32:
                 self.key = asciivalue
     if self.key == 0 and sym <= 255:
         self.key = sym
     interrupt = self.interrupt_key
     if (interrupt & 0xFF == self.key and interrupt >> 8 == self.get_modifier_mask(0)):
         raise KeyboardInterrupt
Esempio n. 30
0
def surrogateescape_errors(space, w_exc):
    check_exception(space, w_exc)
    if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
        w_obj = space.getattr(w_exc, space.newtext('object'))
        w_obj = space.convert_arg_to_w_unicode(w_obj)
        start = space.int_w(space.getattr(w_exc, space.newtext('start')))
        w_end = space.getattr(w_exc, space.newtext('end'))
        end = space.int_w(w_end)
        res = ''
        start = w_obj._index_to_byte(start)
        end = w_obj._index_to_byte(end)
        obj = w_obj._utf8
        pos = start
        while pos < end:
            code = rutf8.codepoint_at_pos(obj, pos)
            if code < 0xdc80 or code > 0xdcff:
                # Not a UTF-8b surrogate, fail with original exception
                raise OperationError(space.type(w_exc), w_exc)
            res += chr(code - 0xdc00)
            pos = rutf8.next_codepoint_pos(obj, pos)
        return space.newtuple([space.newbytes(res), w_end])
    elif space.isinstance_w(w_exc, space.w_UnicodeDecodeError):
        consumed = 0
        start = space.int_w(space.getattr(w_exc, space.newtext('start')))
        end = space.int_w(space.getattr(w_exc, space.newtext('end')))
        obj = space.bytes_w(space.getattr(w_exc, space.newtext('object')))
        replace = u''
        while consumed < 4 and consumed < end - start:
            c = ord(obj[start + consumed])
            if c < 128:
                # Refuse to escape ASCII bytes.
                break
            replace += unichr(0xdc00 + c)
            consumed += 1
        if not consumed:
            # codec complained about ASCII byte.
            raise OperationError(space.type(w_exc), w_exc)
        replace_utf8 = runicode.unicode_encode_utf_8(replace,
                                                     len(replace),
                                                     'strict',
                                                     allow_surrogates=True)
        return space.newtuple([
            space.newtext(replace_utf8, len(replace)),
            space.newint(start + consumed)
        ])
    else:
        raise oefmt(space.w_TypeError,
                    "don't know how to handle %T in error callback", w_exc)
Esempio n. 31
0
def encode_object(space, w_object, encoding, errors):
    if encoding is None:
        # Get the encoder functions as a wrapped object.
        # This lookup is cached.
        w_encoder = space.sys.get_w_default_encoder()
    else:
        if errors is None or errors == 'strict':
            try:
                if encoding == 'ascii':
                    u = space.unicode_w(w_object)
                    eh = unicodehelper.rpy_encode_error_handler()
                    return space.wrapbytes(
                        unicode_encode_ascii(u, len(u), None, errorhandler=eh))
                if encoding == 'utf-8':
                    u = space.unicode_w(w_object)
                    eh = unicodehelper.rpy_encode_error_handler()
                    return space.wrapbytes(
                        unicode_encode_utf_8(u, len(u), None, errorhandler=eh))
            except unicodehelper.RUnicodeEncodeError, ue:
                raise wrap_encode_error(space, ue)
        from pypy.module._codecs.interp_codecs import lookup_codec
        w_encoder = space.getitem(lookup_codec(space, encoding), space.wrap(0))
Esempio n. 32
0
def encode_object(space, w_object, encoding, errors):
    if errors is None or errors == 'strict':
        if encoding is None or encoding == 'utf-8':
            u = space.unicode_w(w_object)
            eh = unicodehelper.encode_error_handler(space)
            return space.newbytes(
                unicode_encode_utf_8(u, len(u), errors, errorhandler=eh))
        elif encoding == 'ascii':
            u = space.unicode_w(w_object)
            eh = unicodehelper.encode_error_handler(space)
            return space.newbytes(
                unicode_encode_ascii(u, len(u), errors, errorhandler=eh))

    from pypy.module._codecs.interp_codecs import encode_text
    if encoding is None:
        encoding = space.sys.defaultencoding
    w_retval = encode_text(space, w_object, encoding, errors)
    if not space.isinstance_w(w_retval, space.w_bytes):
        raise oefmt(
            space.w_TypeError,
            "'%s' encoder returned '%T' instead of 'bytes'; "
            "use codecs.encode() to encode to arbitrary types", encoding,
            w_retval)
    return w_retval
Esempio n. 33
0
 def handle_keypress(self, c_type, event):
     self.key = 0
     p = rffi.cast(RSDL.KeyboardEventPtr, event)
     sym = rffi.getintfield(p.c_keysym, 'c_sym')
     char = rffi.getintfield(p.c_keysym, 'c_unicode')
     if sym == RSDL.K_DOWN:
         self.key = 31
     elif sym == RSDL.K_LEFT:
         self.key = 28
     elif sym == RSDL.K_RIGHT:
         self.key = 29
     elif sym == RSDL.K_UP:
         self.key = 30
     elif char != 0:
         chars = unicode_encode_utf_8(unichr(char), 1, "ignore")
         if len(chars) == 1:
             asciivalue = ord(chars[0])
             if asciivalue >= 32:
                 self.key = asciivalue
     if self.key == 0 and sym <= 255:
         self.key = sym
     interrupt = self.interrupt_key
     if (interrupt & 0xFF == self.key and interrupt >> 8 == self.get_modifier_mask(0)):
         raise KeyboardInterrupt
Esempio n. 34
0
 def _create_dict(self, dct):
     d = {}
     for key, value in dct.iteritems():
         d[unicode_encode_utf_8(key, len(key), "strict")] = value
     return JsonObject(d)
Esempio n. 35
0
 def as_bytes(self):
     from rpython.rlib.runicode import unicode_encode_utf_8
     res = unicode_encode_utf_8(self.unistr, len(self.unistr),
                                "strict")
     return rstring.assert_str0(res)
Esempio n. 36
0
 def tostring(self):
     return "#\\%s" % runicode.unicode_encode_utf_8(
         self.value, len(self.value), "strict")
Esempio n. 37
0
 def as_bytes(self):
     from rpython.rlib.runicode import unicode_encode_utf_8
     res = unicode_encode_utf_8(self.unistr, len(self.unistr), "strict")
     return rstring.assert_str0(res)
Esempio n. 38
0
def unicode_to_utf8(s):
    """Converts a `unicode` value to a UTF8 encoded `str` value."""
    return unicode_encode_utf_8(s, len(s), 'strict')
Esempio n. 39
0
def write_char(w_char, w_port, env, cont):
    c = w_char.value
    from rpython.rlib.runicode import unicode_encode_utf_8
    s = unicode_encode_utf_8(c, len(c), "strict")
    return do_print(s, w_port, env, cont)
Esempio n. 40
0
def write_char(w_char, w_port, env, cont):
    c = w_char.value
    from rpython.rlib.runicode import unicode_encode_utf_8
    s = unicode_encode_utf_8(c, len(c), "strict")
    return do_print(s, w_port, env, cont)
Esempio n. 41
0
 def tostring(self):
     return "#\\%s" % runicode.unicode_encode_utf_8(
             self.value, len(self.value), "strict")
Esempio n. 42
0
        def f(x):

            s1 = "".join(["\xd7\x90\xd6\x96\xeb\x96\x95\xf0\x90\x91\x93"] * x)
            u, consumed = runicode.str_decode_utf_8(s1, len(s1), True)
            s2 = runicode.unicode_encode_utf_8(u, len(u), True)
            return s1 == s2
Esempio n. 43
0
 def encode_utf_escape(self, utf_escape):
     utf_codepoint = int("".join(utf_escape), 16)
     if utf_codepoint > 0x101111:
         self.error("invalid Unicode codepoint (too large)")
     return [c for c in unicode_encode_utf_8(unichr(utf_codepoint), 1, "ignore")]
Esempio n. 44
0
def hex_to_utf8(s):
    uchr = UNICHR(int(s, 16))
    return unicode_encode_utf_8(uchr, len(uchr), 'strict')
Esempio n. 45
0
 def as_bytes(self):
     from rpython.rlib.runicode import unicode_encode_utf_8
     return unicode_encode_utf_8(self.unistr, len(self.unistr),
                                 "strict")
Esempio n. 46
0
def encode_utf8(space, uni):
    return runicode.unicode_encode_utf_8(
        uni, len(uni), "strict",
        errorhandler=encode_error_handler(space),
        allow_surrogates=True)
Esempio n. 47
0
def unicode_to_utf8(s):
    """Converts a `unicode` value to a UTF8 encoded `str` value."""
    return unicode_encode_utf_8(s, len(s), 'strict')
Esempio n. 48
0
 def wrapunicode(self, x):
     return JsonString(unicode_encode_utf_8(x, len(x), "strict"))
Esempio n. 49
0
def encode_unicode_utf8(string):
    result = runicode.unicode_encode_utf_8(string, len(string), None)
    return result
Esempio n. 50
0
        def f(x):

            s1 = "".join(["\xd7\x90\xd6\x96\xeb\x96\x95\xf0\x90\x91\x93"] * x)
            u, consumed = runicode.str_decode_utf_8(s1, len(s1), True)
            s2 = runicode.unicode_encode_utf_8(u, len(u), True)
            return s1 == s2
Esempio n. 51
0
 def as_bytes(self):
     from rpython.rlib.runicode import unicode_encode_utf_8
     return unicode_encode_utf_8(self.unistr, len(self.unistr),
                                 "strict")