Example #1
0
def unicode_escape_decode(space, w_string, errors="strict", w_final=None):
    string = space.getarg_w('s*', w_string).as_str()

    if errors is None:
        errors = 'strict'
    final = space.is_true(w_final)
    state = space.fromcache(CodecState)

    unicode_name_handler = state.get_unicodedata_handler(space)

    result, u_len, lgt, first_escape_error_char = unicodehelper.str_decode_unicode_escape(
        string, errors, final, state.decode_error_handler,
        unicode_name_handler)

    if first_escape_error_char is not None:
        # Here, 'first_escape_error_char' is a single string character.
        # Careful, it might be >= '\x80'.  If it is, it would made an
        # invalid utf-8 string when pasted directory in it.
        if ' ' <= first_escape_error_char < '\x7f':
            msg = "invalid escape sequence '\\%s'" % (
                first_escape_error_char, )
        else:
            msg = "invalid escape sequence: '\\' followed by %s" % (
                space.text_w(
                    space.repr(space.newbytes(first_escape_error_char))), )
        space.warn(space.newtext(msg), space.w_DeprecationWarning)
    return space.newtuple([space.newutf8(result, u_len), space.newint(lgt)])
Example #2
0
def test_unicode_escape_incremental_bug(space):
    class FakeUnicodeDataHandler:
        def call(self, name):
            assert name == "QUESTION MARK"
            return ord("?")

    unicodedata_handler = FakeUnicodeDataHandler()
    input = u"äҰ𐀂?"
    data = b'\\xe4\\u04b0\\U00010002\\N{QUESTION MARK}'
    for i in range(1, len(data)):
        result1, lgt1, _ = str_decode_unicode_escape(data[:i], 'strict', False,
                                                     None, unicodedata_handler)
        result2, lgt2, _ = str_decode_unicode_escape(data[lgt1:i] + data[i:],
                                                     'strict', True, None,
                                                     unicodedata_handler)
        assert lgt1 + lgt2 == len(data)
        assert input == (result1 + result2).decode("utf-8")
Example #3
0
def unicode_escape_decode(space, string, errors="strict", w_final=None):
    from pypy.interpreter import unicodehelper

    if errors is None:
        errors = 'strict'
    final = space.is_true(w_final)
    state = space.fromcache(CodecState)

    unicode_name_handler = state.get_unicodedata_handler(space)

    result, consumed, lgt = unicodehelper.str_decode_unicode_escape(
        string, errors, final, state.decode_error_handler,
        unicode_name_handler)

    return space.newtuple([space.newutf8(result, lgt), space.newint(consumed)])