def decode_object(space, w_obj, encoding, errors): if encoding is None: encoding = getdefaultencoding(space) if errors is None or errors == 'strict': if encoding == 'ascii': # XXX error handling s = space.charbuf_w(w_obj) try: u = fast_str_decode_ascii(s) except ValueError: eh = unicodehelper.decode_error_handler(space) u = str_decode_ascii( # try again, to get the error right s, len(s), None, final=True, errorhandler=eh)[0] return space.wrap(u) if encoding == 'utf-8': s = space.charbuf_w(w_obj) eh = unicodehelper.decode_error_handler(space) return space.wrap(str_decode_utf_8( s, len(s), None, final=True, errorhandler=eh)[0]) w_codecs = space.getbuiltinmodule("_codecs") w_decode = space.getattr(w_codecs, space.wrap("decode")) if errors is None: w_retval = space.call_function(w_decode, w_obj, space.wrap(encoding)) else: w_retval = space.call_function(w_decode, w_obj, space.wrap(encoding), space.wrap(errors)) return w_retval
def decode_object(space, w_obj, encoding, errors): if encoding is None: encoding = getdefaultencoding(space) if errors is None or errors == 'strict': if encoding == 'ascii': # XXX error handling s = space.charbuf_w(w_obj) try: u = fast_str_decode_ascii(s) except ValueError: eh = unicodehelper.decode_error_handler(space) u = str_decode_ascii( # try again, to get the error right s, len(s), None, final=True, errorhandler=eh)[0] return space.newunicode(u) if encoding == 'utf-8': s = space.charbuf_w(w_obj) eh = unicodehelper.decode_error_handler(space) return space.newunicode(str_decode_utf_8( s, len(s), None, final=True, errorhandler=eh, allow_surrogates=True)[0]) w_codecs = space.getbuiltinmodule("_codecs") w_decode = space.getattr(w_codecs, space.newtext("decode")) if errors is None: w_retval = space.call_function(w_decode, w_obj, space.newtext(encoding)) else: w_retval = space.call_function(w_decode, w_obj, space.newtext(encoding), space.newtext(errors)) return w_retval
def decode_object(space, w_obj, encoding, errors): if encoding is None: encoding = getdefaultencoding(space) if errors is None or errors == 'strict': if encoding == 'ascii': s = space.charbuf_w(w_obj) try: u = fast_str_decode_ascii(s) except ValueError: eh = unicodehelper.decode_error_handler(space) u = str_decode_ascii( # try again, to get the error right s, len(s), None, final=True, errorhandler=eh)[0] return space.newunicode(u) if encoding == 'utf-8': s = space.charbuf_w(w_obj) eh = unicodehelper.decode_error_handler(space) return space.newunicode( str_decode_utf_8(s, len(s), None, final=True, errorhandler=eh)[0]) from pypy.module._codecs.interp_codecs import decode_text w_retval = decode_text(space, w_obj, encoding, errors) if not space.isinstance_w(w_retval, space.w_unicode): raise oefmt( space.w_TypeError, "'%s' decoder returned '%T' instead of 'str'; " "use codecs.decode() to decode to arbitrary types", encoding, w_retval) return w_retval
def test_fast_str_decode_ascii(self): u = runicode.fast_str_decode_ascii("abc\x00\x7F") assert type(u) is unicode assert u == u"abc\x00\x7F" py.test.raises(ValueError, runicode.fast_str_decode_ascii, "ab\x80")