def test_detect_encoding(self): mojibake = b'\x95\xb6\x8e\x9a\x89\xbb\x82\xaf' krakozyabry = b'\xeb\xd2\xc1\xcb\xcf\xda\xd1\xc2\xd2\xd9' self.assertEqual('shift_jis', detect_encoding(mojibake, 'shift_jis')) self.assertEqual('koi8-r', detect_encoding(krakozyabry, 'koi8-r')) self.assertEqual('iso8859-1', detect_encoding(b'\xff\xff\xff\x81')) self.assertRaises(ValueError, detect_encoding, b'\xff\xff\xff\x81', 'utf8', fallback=()) self.assertEqual( 'ascii', detect_encoding(b'<html><meta charset="dog_breath"><body>', is_html=True)) self.assertEqual( 'ascii', detect_encoding( b'<html><meta content="text/html; charset=cat-meows><body>', is_html=True)) for length in range(1, 2): iterable = itertools.permutations([bytes(i) for i in range(256)], length) for data in iterable: detect_encoding(b''.join(data))
def test_detect_encoding(self): mojibake = b'\x95\xb6\x8e\x9a\x89\xbb\x82\xaf' krakozyabry = b'\xeb\xd2\xc1\xcb\xcf\xda\xd1\xc2\xd2\xd9' self.assertEqual( 'shift_jis', detect_encoding(mojibake, 'shift_jis') ) self.assertEqual( 'koi8-r', detect_encoding(krakozyabry, 'koi8-r') ) self.assertEqual( 'iso8859-1', detect_encoding(b'\xff\xff\xff\x81') ) self.assertRaises( ValueError, detect_encoding, b'\xff\xff\xff\x81', 'utf8', fallback=() ) self.assertEqual( 'ascii', detect_encoding( b'<html><meta charset="dog_breath"><body>', is_html=True ) ) self.assertEqual( 'ascii', detect_encoding( b'<html><meta content="text/html; charset=cat-meows><body>', is_html=True ) ) for length in range(1, 2): iterable = itertools.permutations( [bytes(i) for i in range(256)], length ) for data in iterable: detect_encoding(b''.join(data))
def test_detect_encoding(self): mojibake = b'\x95\xb6\x8e\x9a\x89\xbb\x82\xaf' krakozyabry = b'\xeb\xd2\xc1\xcb\xcf\xda\xd1\xc2\xd2\xd9' self.assertEqual( 'shift_jis', detect_encoding(mojibake, 'shift_jis') ) self.assertEqual( 'koi8-r', detect_encoding(krakozyabry, 'koi8-r') ) self.assertEqual( 'shift_jis', detect_encoding((mojibake * 10)[:-1], 'shift_jis') ) self.assertEqual( 'koi8-r', detect_encoding((krakozyabry * 10)[:-1], 'koi8-r') ) self.assertEqual( 'iso8859-1', detect_encoding(b'\xff\xff\xff\x81') ) self.assertRaises( ValueError, detect_encoding, b'\xff\xff\xff\x81', 'utf8', fallback=() ) self.assertEqual( 'utf-8', detect_encoding( b'<html><meta charset="dog_breath"><body>', is_html=True ) ) self.assertEqual( 'utf-8', detect_encoding( b'<html><meta content="text/html; charset=cat-meows><body>', is_html=True ) ) self.assertEqual( 'utf-16-le', detect_encoding( codecs.BOM_UTF16_LE + 'Let’s hope no one uses UTF-36'.encode('utf_16_le')[:-1] ) ) # Check for no crash detect_encoding( b'<?xml version="1.0" encoding="UTF-\xdb" ?>' ) for length in range(1, 2): iterable = itertools.permutations( [bytes(i) for i in range(256)], length ) for data in iterable: detect_encoding(b''.join(data))
def test_detect_encoding(self): mojibake = b'\x95\xb6\x8e\x9a\x89\xbb\x82\xaf' krakozyabry = b'\xeb\xd2\xc1\xcb\xcf\xda\xd1\xc2\xd2\xd9' self.assertEqual('shift_jis', detect_encoding(mojibake, 'shift_jis')) self.assertEqual('koi8-r', detect_encoding(krakozyabry, 'koi8-r')) self.assertEqual('shift_jis', detect_encoding((mojibake * 10)[:-1], 'shift_jis')) self.assertEqual('koi8-r', detect_encoding((krakozyabry * 10)[:-1], 'koi8-r')) self.assertEqual('iso8859-1', detect_encoding(b'\xff\xff\xff\x81')) self.assertRaises(ValueError, detect_encoding, b'\xff\xff\xff\x81', 'utf8', fallback=()) self.assertEqual( 'utf-8', detect_encoding(b'<html><meta charset="dog_breath"><body>', is_html=True)) self.assertEqual( 'utf-8', detect_encoding( b'<html><meta content="text/html; charset=cat-meows><body>', is_html=True)) self.assertEqual( 'utf-16-le', detect_encoding( codecs.BOM_UTF16_LE + 'Let’s hope no one uses UTF-36'.encode('utf_16_le')[:-1])) # Check for no crash detect_encoding(b'<?xml version="1.0" encoding="UTF-\xdb" ?>') for length in range(1, 2): iterable = itertools.permutations([bytes(i) for i in range(256)], length) for data in iterable: detect_encoding(b''.join(data))