Example #1
0
def test_lookup_error():
    '''
    '''
    #sanity
    AssertError(LookupError, codecs.lookup_error, "blah garbage xyz")
    def garbage_error1(someError): pass
    codecs.register_error("blah garbage xyz", garbage_error1)
    AreEqual(codecs.lookup_error("blah garbage xyz"), garbage_error1)
    def garbage_error2(someError): pass
    codecs.register_error("some other", garbage_error2)
    AreEqual(codecs.lookup_error("some other"), garbage_error2)
Example #2
0
def test_lookup_error():
    '''
    '''
    #sanity
    AssertError(LookupError, codecs.lookup_error, "blah garbage xyz")
    def garbage_error1(someError): pass
    codecs.register_error("blah garbage xyz", garbage_error1)
    AreEqual(codecs.lookup_error("blah garbage xyz"), garbage_error1)
    def garbage_error2(someError): pass
    codecs.register_error("some other", garbage_error2)
    AreEqual(codecs.lookup_error("some other"), garbage_error2)
Example #3
0
def unicode_call_errorhandler(errors,
                              encoding,
                              reason,
                              input,
                              startinpos,
                              endinpos,
                              decode=True):

    import _codecs
    errorHandler = _codecs.lookup_error(errors)
    if decode:
        exceptionObject = UnicodeDecodeError(encoding, input, startinpos,
                                             endinpos, reason)
    else:
        exceptionObject = UnicodeEncodeError(encoding, input, startinpos,
                                             endinpos, reason)
    res = errorHandler(exceptionObject)
    if isinstance(res, tuple) and isinstance(res[0], unicode) and isinstance(
            res[1], int):
        newpos = res[1]
        if (newpos < 0):
            newpos = len(input) + newpos
        if newpos < 0 or newpos > len(input):
            raise IndexError("position %d from error handler out of bounds" %
                             newpos)
        return res[0], newpos
    else:
        raise TypeError(
            "encoding error handler must return (unicode, int) tuple, not %s" %
            repr(res))
Example #4
0
 def test_surrogatepass_handler(self):
     import _codecs
     assert _codecs.lookup_error("surrogatepass")
     assert ("abc\ud800def".encode("utf-8", "surrogatepass") ==
             b"abc\xed\xa0\x80def")
     assert (b"abc\xed\xa0\x80def".decode("utf-8", "surrogatepass") ==
             "abc\ud800def")
     assert ('surrogate:\udcff'.encode("utf-8", "surrogatepass") ==
             b'surrogate:\xed\xb3\xbf')
     assert (b'surrogate:\xed\xb3\xbf'.decode("utf-8", "surrogatepass") ==
             'surrogate:\udcff')
     raises(UnicodeDecodeError, b"abc\xed\xa0".decode, "utf-8",
            "surrogatepass")
     raises(UnicodeDecodeError, b"abc\xed\xa0z".decode, "utf-8",
            "surrogatepass")
Example #5
0
 def test_surrogatepass_handler(self):
     import _codecs
     assert _codecs.lookup_error("surrogatepass")
     assert ("abc\ud800def".encode(
         "utf-8", "surrogatepass") == b"abc\xed\xa0\x80def")
     assert (b"abc\xed\xa0\x80def".decode(
         "utf-8", "surrogatepass") == "abc\ud800def")
     assert ('surrogate:\udcff'.encode(
         "utf-8", "surrogatepass") == b'surrogate:\xed\xb3\xbf')
     assert (b'surrogate:\xed\xb3\xbf'.decode(
         "utf-8", "surrogatepass") == 'surrogate:\udcff')
     raises(UnicodeDecodeError, b"abc\xed\xa0".decode, "utf-8",
            "surrogatepass")
     raises(UnicodeDecodeError, b"abc\xed\xa0z".decode, "utf-8",
            "surrogatepass")
 def test_surrogatepass_handler(self):
     import _codecs
     assert _codecs.lookup_error("surrogatepass")
     assert ("abc\ud800def".encode(
         "utf-8", "surrogatepass") == b"abc\xed\xa0\x80def")
     assert (b"abc\xed\xa0\x80def".decode(
         "utf-8", "surrogatepass") == "abc\ud800def")
     assert ('surrogate:\udcff'.encode(
         "utf-8", "surrogatepass") == b'surrogate:\xed\xb3\xbf')
     assert (b'surrogate:\xed\xb3\xbf'.decode(
         "utf-8", "surrogatepass") == 'surrogate:\udcff')
     raises(UnicodeDecodeError, b"abc\xed\xa0".decode, "utf-8",
            "surrogatepass")
     raises(UnicodeDecodeError, b"abc\xed\xa0z".decode, "utf-8",
            "surrogatepass")
     assert u'\ud8ae'.encode('utf_16_be', 'surrogatepass') == b'\xd8\xae'
     assert (u'\U0000d8ae'.encode('utf-32-be',
                                  'surrogatepass') == b'\x00\x00\xd8\xae')
Example #7
0
def unicode_call_errorhandler(errors,  encoding, 
                reason, input, startinpos, endinpos, decode=True):
    
    import _codecs
    errorHandler = _codecs.lookup_error(errors)
    if decode:
        exceptionObject = UnicodeDecodeError(encoding, input, startinpos, endinpos, reason)
    else:
        exceptionObject = UnicodeEncodeError(encoding, input, startinpos, endinpos, reason)
    res = errorHandler(exceptionObject)
    if isinstance(res, tuple) and len(res) == 2 and isinstance(res[0], unicode) and isinstance(res[1], int):
        newpos = res[1]
        if (newpos < 0):
            newpos = len(input) + newpos
        if newpos < 0 or newpos > len(input):
            raise IndexError( "position %d from error handler out of bounds" % newpos)
        return res[0], newpos
    else:
        raise TypeError("encoding error handler must return (unicode, int) tuple, not %s" % repr(res))
Example #8
0
        multiple character to \\u001a.

    """
    m = {}
    for k, v in decoding_map.items():
        if not v in m:
            m[v] = k
        else:
            m[v] = None
    return m


### error handlers

try:
    strict_errors = lookup_error("strict")
    ignore_errors = lookup_error("ignore")
    replace_errors = lookup_error("replace")
    xmlcharrefreplace_errors = lookup_error("xmlcharrefreplace")
    backslashreplace_errors = lookup_error("backslashreplace")
    namereplace_errors = lookup_error("namereplace")
except LookupError:
    # In --disable-unicode builds, these error handler are missing
    strict_errors = None
    ignore_errors = None
    replace_errors = None
    xmlcharrefreplace_errors = None
    backslashreplace_errors = None
    namereplace_errors = None

# Tell modulefinder that using codecs probably needs the encodings