Example #1
0
 def test_badandgoodbackslashreplaceexceptions(self):
     # "backslashreplace" complains about a non-exception passed in
     self.assertRaises(
        TypeError,
        codecs.backslashreplace_errors,
        42
     )
     # "backslashreplace" complains about the wrong exception types
     self.assertRaises(
        TypeError,
        codecs.backslashreplace_errors,
        UnicodeError("ouch")
     )
     # Use the correct exception
     tests = [
         ("\u3042", "\\u3042"),
         ("\n", "\\x0a"),
         ("a", "\\x61"),
         ("\x00", "\\x00"),
         ("\xff", "\\xff"),
         ("\u0100", "\\u0100"),
         ("\uffff", "\\uffff"),
         ("\U00010000", "\\U00010000"),
         ("\U0010ffff", "\\U0010ffff"),
         # Lone surrogates
         ("\ud800", "\\ud800"),
         ("\udfff", "\\udfff"),
         ("\ud800\udfff", "\\ud800\\udfff"),
     ]
     for s, r in tests:
         with self.subTest(str=s):
             self.assertEqual(
                 codecs.backslashreplace_errors(
                     UnicodeEncodeError("ascii", "a" + s + "b",
                                        1, 1 + len(s), "ouch")),
                 (r, 1 + len(s))
             )
             self.assertEqual(
                 codecs.backslashreplace_errors(
                     UnicodeTranslateError("a" + s + "b",
                                           1, 1 + len(s), "ouch")),
                 (r, 1 + len(s))
             )
     tests = [
         (b"a", "\\x61"),
         (b"\n", "\\x0a"),
         (b"\x00", "\\x00"),
         (b"\xff", "\\xff"),
     ]
     for b, r in tests:
         with self.subTest(bytes=b):
             self.assertEqual(
                 codecs.backslashreplace_errors(
                     UnicodeDecodeError("ascii", bytearray(b"a" + b + b"b"),
                                        1, 2, "ouch")),
                 (r, 2)
             )
 def test_badandgoodbackslashreplaceexceptions(self):
     # "backslashreplace" complains about a non-exception passed in
     self.assertRaises(
        TypeError,
        codecs.backslashreplace_errors,
        42
     )
     # "backslashreplace" complains about the wrong exception types
     self.assertRaises(
        TypeError,
        codecs.backslashreplace_errors,
        UnicodeError("ouch")
     )
     # Use the correct exception
     tests = [
         ("\u3042", "\\u3042"),
         ("\n", "\\x0a"),
         ("a", "\\x61"),
         ("\x00", "\\x00"),
         ("\xff", "\\xff"),
         ("\u0100", "\\u0100"),
         ("\uffff", "\\uffff"),
         ("\U00010000", "\\U00010000"),
         ("\U0010ffff", "\\U0010ffff"),
         # Lone surrogates
         ("\ud800", "\\ud800"),
         ("\udfff", "\\udfff"),
         ("\ud800\udfff", "\\ud800\\udfff"),
     ]
     for s, r in tests:
         with self.subTest(str=s):
             self.assertEqual(
                 codecs.backslashreplace_errors(
                     UnicodeEncodeError("ascii", "a" + s + "b",
                                        1, 1 + len(s), "ouch")),
                 (r, 1 + len(s))
             )
             self.assertEqual(
                 codecs.backslashreplace_errors(
                     UnicodeTranslateError("a" + s + "b",
                                           1, 1 + len(s), "ouch")),
                 (r, 1 + len(s))
             )
     tests = [
         (b"a", "\\x61"),
         (b"\n", "\\x0a"),
         (b"\x00", "\\x00"),
         (b"\xff", "\\xff"),
     ]
     for b, r in tests:
         with self.subTest(bytes=b):
             self.assertEqual(
                 codecs.backslashreplace_errors(
                     UnicodeDecodeError("ascii", bytearray(b"a" + b + b"b"),
                                        1, 2, "ouch")),
                 (r, 2)
             )
Example #3
0
def backslashescape_errors(exception):
    if isinstance(exception, UnicodeDecodeError):
        bad_data = exception.object[exception.start:exception.end]
        escaped = "".join(r"\x%02x" % x for x in bad_data)
        return escaped, exception.end

    return codecs.backslashreplace_errors(exception)
Example #4
0
def escape_invis(decode_error):
    decode_error.end = decode_error.start + 1
    if CONTROL_CHARS.match(
            decode_error.object[decode_error.start:decode_error.end]):
        return codecs.backslashreplace_errors(decode_error)
    return decode_error.object[decode_error.start:decode_error.end].encode(
        'utf-8'), decode_error.end
Example #5
0
 def test_badandgoodbackslashreplaceexceptions(self):
     # "backslashreplace" complains about a non-exception passed in
     self.assertRaises(TypeError, codecs.backslashreplace_errors, 42)
     # "backslashreplace" complains about the wrong exception types
     self.assertRaises(TypeError, codecs.backslashreplace_errors,
                       UnicodeError("ouch"))
     # "backslashreplace" can only be used for encoding
     self.assertRaises(
         TypeError, codecs.backslashreplace_errors,
         UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch"))
     self.assertRaises(TypeError, codecs.backslashreplace_errors,
                       UnicodeTranslateError("\u3042", 0, 1, "ouch"))
     # Use the correct exception
     self.assertEqual(
         codecs.backslashreplace_errors(
             UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")),
         ("\\u3042", 1))
     self.assertEqual(
         codecs.backslashreplace_errors(
             UnicodeEncodeError("ascii", "\x00", 0, 1, "ouch")),
         ("\\x00", 1))
     self.assertEqual(
         codecs.backslashreplace_errors(
             UnicodeEncodeError("ascii", "\xff", 0, 1, "ouch")),
         ("\\xff", 1))
     self.assertEqual(
         codecs.backslashreplace_errors(
             UnicodeEncodeError("ascii", "\u0100", 0, 1, "ouch")),
         ("\\u0100", 1))
     self.assertEqual(
         codecs.backslashreplace_errors(
             UnicodeEncodeError("ascii", "\uffff", 0, 1, "ouch")),
         ("\\uffff", 1))
     # 1 on UCS-4 builds, 2 on UCS-2
     len_wide = len("\U00010000")
     self.assertEqual(
         codecs.backslashreplace_errors(
             UnicodeEncodeError("ascii", "\U00010000", 0, len_wide,
                                "ouch")), ("\\U00010000", len_wide))
     self.assertEqual(
         codecs.backslashreplace_errors(
             UnicodeEncodeError("ascii", "\U0010ffff", 0, len_wide,
                                "ouch")), ("\\U0010ffff", len_wide))
     # Lone surrogates (regardless of unicode width)
     self.assertEqual(
         codecs.backslashreplace_errors(
             UnicodeEncodeError("ascii", "\ud800", 0, 1, "ouch")),
         ("\\ud800", 1))
     self.assertEqual(
         codecs.backslashreplace_errors(
             UnicodeEncodeError("ascii", "\udfff", 0, 1, "ouch")),
         ("\\udfff", 1))
Example #6
0
 def test_badandgoodbackslashreplaceexceptions(self):
     # "backslashreplace" complains about a non-exception passed in
     self.assertRaises(TypeError, codecs.backslashreplace_errors, 42)
     # "backslashreplace" complains about the wrong exception types
     self.assertRaises(TypeError, codecs.backslashreplace_errors, UnicodeError("ouch"))
     # "backslashreplace" can only be used for encoding
     self.assertRaises(TypeError, codecs.backslashreplace_errors, UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch"))
     self.assertRaises(TypeError, codecs.backslashreplace_errors, UnicodeTranslateError(u"\u3042", 0, 1, "ouch"))
     # Use the correct exception
     self.assertEquals(
         codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")), (u"\\u3042", 1)
     )
     self.assertEquals(
         codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\x00", 0, 1, "ouch")), (u"\\x00", 1)
     )
     self.assertEquals(
         codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\xff", 0, 1, "ouch")), (u"\\xff", 1)
     )
     self.assertEquals(
         codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u0100", 0, 1, "ouch")), (u"\\u0100", 1)
     )
     self.assertEquals(
         codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\uffff", 0, 1, "ouch")), (u"\\uffff", 1)
     )
     if sys.maxunicode > 0xFFFF:
         self.assertEquals(
             codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U00010000", 0, 1, "ouch")),
             (u"\\U00010000", 1),
         )
         self.assertEquals(
             codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U0010ffff", 0, 1, "ouch")),
             (u"\\U0010ffff", 1),
         )
 def test_badandgoodbackslashreplaceexceptions(self):
     # "backslashreplace" complains about a non-exception passed in
     self.assertRaises(
        TypeError,
        codecs.backslashreplace_errors,
        42
     )
     # "backslashreplace" complains about the wrong exception types
     self.assertRaises(
        TypeError,
        codecs.backslashreplace_errors,
        UnicodeError("ouch")
     )
     # "backslashreplace" can only be used for encoding
     self.assertRaises(
         TypeError,
         codecs.backslashreplace_errors,
         UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")
     )
     self.assertRaises(
         TypeError,
         codecs.backslashreplace_errors,
         UnicodeTranslateError(u"\u3042", 0, 1, "ouch")
     )
     # Use the correct exception
     tests = [
         (u"\u3042", u"\\u3042"),
         (u"\n", u"\\x0a"),
         (u"a", u"\\x61"),
         (u"\x00", u"\\x00"),
         (u"\xff", u"\\xff"),
         (u"\u0100", u"\\u0100"),
         (u"\uffff", u"\\uffff"),
         # Lone surrogates
         (u"\ud800", u"\\ud800"),
         (u"\udfff", u"\\udfff"),
     ]
     if sys.maxunicode > 0xffff:
         tests += [
             (u"\U00010000", u"\\U00010000"),
             (u"\U0010ffff", u"\\U0010ffff"),
         ]
     else:
         tests += [
             (u"\U00010000", u"\\ud800\\udc00"),
             (u"\U0010ffff", u"\\udbff\\udfff"),
         ]
     for s, r in tests:
         self.assertEqual(
             codecs.backslashreplace_errors(
                 UnicodeEncodeError("ascii", u"a" + s + u"b",
                                    1, 1 + len(s), "ouch")),
             (r, 1 + len(s))
         )
 def test_badandgoodbackslashreplaceexceptions(self):
     # "backslashreplace" complains about a non-exception passed in
     self.assertRaises(
        TypeError,
        codecs.backslashreplace_errors,
        42
     )
     # "backslashreplace" complains about the wrong exception types
     self.assertRaises(
        TypeError,
        codecs.backslashreplace_errors,
        UnicodeError("ouch")
     )
     # "backslashreplace" can only be used for encoding
     self.assertRaises(
         TypeError,
         codecs.backslashreplace_errors,
         UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")
     )
     self.assertRaises(
         TypeError,
         codecs.backslashreplace_errors,
         UnicodeTranslateError(u"\u3042", 0, 1, "ouch")
     )
     # Use the correct exception
     tests = [
         (u"\u3042", u"\\u3042"),
         (u"\n", u"\\x0a"),
         (u"a", u"\\x61"),
         (u"\x00", u"\\x00"),
         (u"\xff", u"\\xff"),
         (u"\u0100", u"\\u0100"),
         (u"\uffff", u"\\uffff"),
         # Lone surrogates
         (u"\ud800", u"\\ud800"),
         (u"\udfff", u"\\udfff"),
     ]
     if sys.maxunicode > 0xffff:
         tests += [
             (u"\U00010000", u"\\U00010000"),
             (u"\U0010ffff", u"\\U0010ffff"),
         ]
     else:
         tests += [
             (u"\U00010000", u"\\ud800\\udc00"),
             (u"\U0010ffff", u"\\udbff\\udfff"),
         ]
     for s, r in tests:
         self.assertEqual(
             codecs.backslashreplace_errors(
                 UnicodeEncodeError("ascii", u"a" + s + u"b",
                                    1, 1 + len(s), "ouch")),
             (r, 1 + len(s))
         )
Example #9
0
 def test_badandgoodbackslashreplaceexceptions(self):
     # "backslashreplace" complains about a non-exception passed in
     self.assertRaises(
        TypeError,
        codecs.backslashreplace_errors,
        42
     )
     # "backslashreplace" complains about the wrong exception types
     self.assertRaises(
        TypeError,
        codecs.backslashreplace_errors,
        UnicodeError("ouch")
     )
     # "backslashreplace" can only be used for encoding
     self.assertRaises(
         TypeError,
         codecs.backslashreplace_errors,
         UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")
     )
     self.assertRaises(
         TypeError,
         codecs.backslashreplace_errors,
         UnicodeTranslateError(u"\u3042", 0, 1, "ouch")
     )
     if test_support.due_to_ironpython_bug("http://tkbgitvstfat01:8080/WorkItemTracking/WorkItem.aspx?artifactMoniker=303935"):
         return
     # Use the correct exception
     self.assertEquals(
         codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
         (u"\\u3042", 1)
     )
     self.assertEquals(
         codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\x00", 0, 1, "ouch")),
         (u"\\x00", 1)
     )
     self.assertEquals(
         codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\xff", 0, 1, "ouch")),
         (u"\\xff", 1)
     )
     self.assertEquals(
         codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u0100", 0, 1, "ouch")),
         (u"\\u0100", 1)
     )
     self.assertEquals(
         codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\uffff", 0, 1, "ouch")),
         (u"\\uffff", 1)
     )
     if sys.maxunicode>0xffff:
         self.assertEquals(
             codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U00010000", 0, 1, "ouch")),
             (u"\\U00010000", 1)
         )
         self.assertEquals(
             codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U0010ffff", 0, 1, "ouch")),
             (u"\\U0010ffff", 1)
         )
Example #10
0
 def test_badandgoodbackslashreplaceexceptions(self):
     # "backslashreplace" complains about a non-exception passed in
     self.assertRaises(
        TypeError,
        codecs.backslashreplace_errors,
        42
     )
     # "backslashreplace" complains about the wrong exception types
     self.assertRaises(
        TypeError,
        codecs.backslashreplace_errors,
        UnicodeError("ouch")
     )
     # "backslashreplace" can only be used for encoding
     self.assertRaises(
         TypeError,
         codecs.backslashreplace_errors,
         UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")
     )
     self.assertRaises(
         TypeError,
         codecs.backslashreplace_errors,
         UnicodeTranslateError(u"\u3042", 0, 1, "ouch")
     )
     if test_support.due_to_ironpython_bug("http://tkbgitvstfat01:8080/WorkItemTracking/WorkItem.aspx?artifactMoniker=303935"):
         return
     # Use the correct exception
     self.assertEquals(
         codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
         (u"\\u3042", 1)
     )
     self.assertEquals(
         codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\x00", 0, 1, "ouch")),
         (u"\\x00", 1)
     )
     self.assertEquals(
         codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\xff", 0, 1, "ouch")),
         (u"\\xff", 1)
     )
     self.assertEquals(
         codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u0100", 0, 1, "ouch")),
         (u"\\u0100", 1)
     )
     self.assertEquals(
         codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\uffff", 0, 1, "ouch")),
         (u"\\uffff", 1)
     )
     if sys.maxunicode>0xffff:
         self.assertEquals(
             codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U00010000", 0, 1, "ouch")),
             (u"\\U00010000", 1)
         )
         self.assertEquals(
             codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U0010ffff", 0, 1, "ouch")),
             (u"\\U0010ffff", 1)
         )
 def test_badandgoodbackslashreplaceexceptions(self):
     # "backslashreplace" complains about a non-exception passed in
     self.assertRaises(
        TypeError,
        codecs.backslashreplace_errors,
        42
     )
     # "backslashreplace" complains about the wrong exception types
     self.assertRaises(
        TypeError,
        codecs.backslashreplace_errors,
        UnicodeError("ouch")
     )
     # "backslashreplace" can only be used for encoding
     self.assertRaises(
         TypeError,
         codecs.backslashreplace_errors,
         UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")
     )
     self.assertRaises(
         TypeError,
         codecs.backslashreplace_errors,
         UnicodeTranslateError(u"\u3042", 0, 1, "ouch")
     )
     # Use the correct exception
     self.assertEquals(
         codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
         (u"\\u3042", 1)
     )
     self.assertEquals(
         codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\x00", 0, 1, "ouch")),
         (u"\\x00", 1)
     )
     self.assertEquals(
         codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\xff", 0, 1, "ouch")),
         (u"\\xff", 1)
     )
     self.assertEquals(
         codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u0100", 0, 1, "ouch")),
         (u"\\u0100", 1)
     )
     self.assertEquals(
         codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\uffff", 0, 1, "ouch")),
         (u"\\uffff", 1)
     )
     if sys.maxunicode>0xffff:
         self.assertEquals(
             codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U00010000", 0, 1, "ouch")),
             (u"\\U00010000", 1)
         )
         self.assertEquals(
             codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U0010ffff", 0, 1, "ouch")),
             (u"\\U0010ffff", 1)
         )
 def test_badandgoodbackslashreplaceexceptions(self):
     # "backslashreplace" complains about a non-exception passed in
     self.assertRaises(TypeError, codecs.backslashreplace_errors, 42)
     # "backslashreplace" complains about the wrong exception types
     self.assertRaises(TypeError, codecs.backslashreplace_errors, UnicodeError("ouch"))
     # "backslashreplace" can only be used for encoding
     self.assertRaises(
         TypeError, codecs.backslashreplace_errors, UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch")
     )
     self.assertRaises(TypeError, codecs.backslashreplace_errors, UnicodeTranslateError("\u3042", 0, 1, "ouch"))
     # Use the correct exception
     self.assertEqual(
         codecs.backslashreplace_errors(UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")), ("\\u3042", 1)
     )
     self.assertEqual(
         codecs.backslashreplace_errors(UnicodeEncodeError("ascii", "\x00", 0, 1, "ouch")), ("\\x00", 1)
     )
     self.assertEqual(
         codecs.backslashreplace_errors(UnicodeEncodeError("ascii", "\xff", 0, 1, "ouch")), ("\\xff", 1)
     )
     self.assertEqual(
         codecs.backslashreplace_errors(UnicodeEncodeError("ascii", "\u0100", 0, 1, "ouch")), ("\\u0100", 1)
     )
     self.assertEqual(
         codecs.backslashreplace_errors(UnicodeEncodeError("ascii", "\uffff", 0, 1, "ouch")), ("\\uffff", 1)
     )
     # 1 on UCS-4 builds, 2 on UCS-2
     len_wide = len("\U00010000")
     self.assertEqual(
         codecs.backslashreplace_errors(UnicodeEncodeError("ascii", "\U00010000", 0, len_wide, "ouch")),
         ("\\U00010000", len_wide),
     )
     self.assertEqual(
         codecs.backslashreplace_errors(UnicodeEncodeError("ascii", "\U0010ffff", 0, len_wide, "ouch")),
         ("\\U0010ffff", len_wide),
     )
     # Lone surrogates (regardless of unicode width)
     self.assertEqual(
         codecs.backslashreplace_errors(UnicodeEncodeError("ascii", "\ud800", 0, 1, "ouch")), ("\\ud800", 1)
     )
     self.assertEqual(
         codecs.backslashreplace_errors(UnicodeEncodeError("ascii", "\udfff", 0, 1, "ouch")), ("\\udfff", 1)
     )