Exemple #1
0
def utf_8_encode(space, w_obj, errors="strict"):
    utf8, lgt = space.utf8_len_w(w_obj)
    if lgt == len(utf8): # ascii
        return space.newtuple([space.newbytes(utf8), space.newint(lgt)])
    if rutf8.has_surrogates(utf8):
        utf8 = rutf8.reencode_utf8_with_surrogates(utf8)
    return space.newtuple([space.newbytes(utf8), space.newint(lgt)])
Exemple #2
0
def test_has_surrogate_xed_no_surrogate():
    u = unichr(55217) + unichr(54990)
    b = u.encode("utf-8")
    assert b.startswith(b"\xed")
    assert not rutf8.has_surrogates(b)
Exemple #3
0
def test_has_surrogates(arg, surrogate):
    b = (arg + unichr(surrogate) + arg).encode("utf-8")
    assert not rutf8.has_surrogates(arg.encode("utf-8"))
    assert rutf8.has_surrogates(unichr(surrogate).encode("utf-8"))
    assert rutf8.has_surrogates(b)