def test_guess_encoding_ascii(self): """ Assert when ascii-only data is provided ascii is the guessed encoding. """ data = u'Twas bryllyg, and the slythy toves did gyre and gymble' result = encoding_utils.guess_encoding(data.encode('ascii')) self.assertEqual(result, 'ascii')
def test_guess_encoding_favor_utf_8(self): """ Test that strings that could be UTF-8 or ISO-8859-2 result in UTF-8. """ data = u'Šabata'.encode('utf-8') result = encoding_utils.guess_encoding(data) chardet_result = chardet.detect(data) self.assertEqual(result, 'utf-8') self.assertEqual(chardet_result['encoding'], 'ISO-8859-2')
def test_guess_encoding_favor_utf_8(self): """ Test that strings that could be UTF-8 or ISO-8859-* result in UTF-8. python-chardet-3.0.4-2.fc27.noarch detects it as ISO-8859-9 python-chardet-2.2.1-1.el7_1.noarch detects it as ISO-8859-2 """ data = 'Šabata'.encode('utf-8') result = encoding_utils.guess_encoding(data) chardet_result = chardet.detect(data) self.assertEqual(result, 'utf-8') if chardet.__version__[0] == '3': self.assertEqual(chardet_result['encoding'], 'ISO-8859-9') else: self.assertEqual(chardet_result['encoding'], 'ISO-8859-2')
def test_guess_encoding_favor_utf_8(self): """ Test that strings that could be UTF-8 or ISO-8859-* result in UTF-8. python-chardet-3.0.4-2.fc27.noarch detects it as ISO-8859-9 python-chardet-2.2.1-1.el7_1.noarch detects it as ISO-8859-2 """ data = "Šabata".encode("utf-8") result = encoding_utils.guess_encoding(data) chardet_result = chardet.detect(data) self.assertEqual(result, "utf-8") if chardet.__version__[0] == "3": self.assertEqual(chardet_result["encoding"], "ISO-8859-9") else: self.assertEqual(chardet_result["encoding"], "ISO-8859-2")
def test_guess_encoding_no_data(self): result = encoding_utils.guess_encoding(u''.encode('utf-8')) self.assertEqual(result, 'ascii')
def test_guess_encoding_no_data(self): """ Test encoding_utils.guess_encoding() with an empty string """ result = encoding_utils.guess_encoding(''.encode('utf-8')) self.assertEqual(result, 'ascii')
def test_guess_encoding_no_data(self): """ Test encoding_utils.guess_encoding() with an empty string """ result = encoding_utils.guess_encoding(u''.encode('utf-8')) self.assertEqual(result, 'ascii')
def test_guess_encoding_no_data(self): """ Test encoding_utils.guess_encoding() with an empty string """ result = encoding_utils.guess_encoding("".encode("utf-8")) self.assertEqual(result, "ascii")