def utf_8_encode(space, w_obj, errors="strict"): utf8, lgt = space.utf8_len_w(w_obj) if lgt == len(utf8): # ascii return space.newtuple([space.newbytes(utf8), space.newint(lgt)]) if rutf8.has_surrogates(utf8): utf8 = rutf8.reencode_utf8_with_surrogates(utf8) return space.newtuple([space.newbytes(utf8), space.newint(lgt)])
def test_has_surrogate_xed_no_surrogate(): u = unichr(55217) + unichr(54990) b = u.encode("utf-8") assert b.startswith(b"\xed") assert not rutf8.has_surrogates(b)
def test_has_surrogates(arg, surrogate): b = (arg + unichr(surrogate) + arg).encode("utf-8") assert not rutf8.has_surrogates(arg.encode("utf-8")) assert rutf8.has_surrogates(unichr(surrogate).encode("utf-8")) assert rutf8.has_surrogates(b)