def decode_object(space, w_obj, encoding, errors): if encoding is None: encoding = getdefaultencoding(space) if errors is None or errors == 'strict': if encoding == 'ascii': # XXX error handling s = space.bufferstr_w(w_obj) eh = decode_error_handler(space) return space.wrap(str_decode_ascii(s, len(s), None, final=True, errorhandler=eh)[0]) if encoding == 'utf-8': s = space.bufferstr_w(w_obj) eh = decode_error_handler(space) return space.wrap(str_decode_utf_8(s, len(s), None, final=True, errorhandler=eh)[0]) w_codecs = space.getbuiltinmodule("_codecs") w_decode = space.getattr(w_codecs, space.wrap("decode")) if errors is None: w_retval = space.call_function(w_decode, w_obj, space.wrap(encoding)) else: w_retval = space.call_function(w_decode, w_obj, space.wrap(encoding), space.wrap(errors)) return w_retval
def str_decode_utf8(rope): from pypy.rlib.runicode import str_decode_utf_8 if rope.is_ascii(): return rope elif isinstance(rope, BinaryConcatNode): lresult = str_decode_utf8(rope.left) if lresult is not None: return BinaryConcatNode(lresult, str_decode_utf8(rope.right)) elif isinstance(rope, LiteralStringNode): try: result, consumed = str_decode_utf_8(rope.s, len(rope.s), "strict", False) except UnicodeDecodeError: return None if consumed < len(rope.s): return None return rope_from_unicode(result) s = rope.flatten_string() try: result, consumed = str_decode_utf_8(s, len(s), "strict", True) return rope_from_unicode(result) except UnicodeDecodeError: pass
def f(x): s1 = "".join(["\xd7\x90\xd6\x96\xeb\x96\x95\xf0\x90\x91\x93"] * x) u, consumed = runicode.str_decode_utf_8(s1, len(s1), True) s2 = runicode.unicode_encode_utf_8(u, len(u), True) return s1 == s2