def test_guess_encoding_ascii(self):
     """
     Assert when ascii-only data is provided ascii is the guessed encoding.
     """
     data = u'Twas bryllyg, and the slythy toves did gyre and gymble'
     result = encoding_utils.guess_encoding(data.encode('ascii'))
     self.assertEqual(result, 'ascii')
Beispiel #2
0
 def test_guess_encoding_ascii(self):
     """
     Assert when ascii-only data is provided ascii is the guessed encoding.
     """
     data = u'Twas bryllyg, and the slythy toves did gyre and gymble'
     result = encoding_utils.guess_encoding(data.encode('ascii'))
     self.assertEqual(result, 'ascii')
 def test_guess_encoding_favor_utf_8(self):
     """
     Test that strings that could be UTF-8 or ISO-8859-2 result in UTF-8.
     """
     data = u'Šabata'.encode('utf-8')
     result = encoding_utils.guess_encoding(data)
     chardet_result = chardet.detect(data)
     self.assertEqual(result, 'utf-8')
     self.assertEqual(chardet_result['encoding'], 'ISO-8859-2')
Beispiel #4
0
 def test_guess_encoding_favor_utf_8(self):
     """
     Test that strings that could be UTF-8 or ISO-8859-2 result in UTF-8.
     """
     data = u'Šabata'.encode('utf-8')
     result = encoding_utils.guess_encoding(data)
     chardet_result = chardet.detect(data)
     self.assertEqual(result, 'utf-8')
     self.assertEqual(chardet_result['encoding'], 'ISO-8859-2')
Beispiel #5
0
    def test_guess_encoding_favor_utf_8(self):
        """
        Test that strings that could be UTF-8 or ISO-8859-* result in UTF-8.

        python-chardet-3.0.4-2.fc27.noarch detects it as ISO-8859-9
        python-chardet-2.2.1-1.el7_1.noarch detects it as ISO-8859-2
        """
        data = 'Šabata'.encode('utf-8')
        result = encoding_utils.guess_encoding(data)
        chardet_result = chardet.detect(data)
        self.assertEqual(result, 'utf-8')
        if chardet.__version__[0] == '3':
            self.assertEqual(chardet_result['encoding'], 'ISO-8859-9')
        else:
            self.assertEqual(chardet_result['encoding'], 'ISO-8859-2')
    def test_guess_encoding_favor_utf_8(self):
        """
        Test that strings that could be UTF-8 or ISO-8859-* result in UTF-8.

        python-chardet-3.0.4-2.fc27.noarch detects it as ISO-8859-9
        python-chardet-2.2.1-1.el7_1.noarch detects it as ISO-8859-2
        """
        data = "Šabata".encode("utf-8")
        result = encoding_utils.guess_encoding(data)
        chardet_result = chardet.detect(data)
        self.assertEqual(result, "utf-8")
        if chardet.__version__[0] == "3":
            self.assertEqual(chardet_result["encoding"], "ISO-8859-9")
        else:
            self.assertEqual(chardet_result["encoding"], "ISO-8859-2")
    def test_guess_encoding_favor_utf_8(self):
        """
        Test that strings that could be UTF-8 or ISO-8859-* result in UTF-8.

        python-chardet-3.0.4-2.fc27.noarch detects it as ISO-8859-9
        python-chardet-2.2.1-1.el7_1.noarch detects it as ISO-8859-2
        """
        data = "Šabata".encode("utf-8")
        result = encoding_utils.guess_encoding(data)
        chardet_result = chardet.detect(data)
        self.assertEqual(result, "utf-8")
        if chardet.__version__[0] == "3":
            self.assertEqual(chardet_result["encoding"], "ISO-8859-9")
        else:
            self.assertEqual(chardet_result["encoding"], "ISO-8859-2")
 def test_guess_encoding_no_data(self):
     result = encoding_utils.guess_encoding(u''.encode('utf-8'))
     self.assertEqual(result, 'ascii')
Beispiel #9
0
 def test_guess_encoding_no_data(self):
     """ Test encoding_utils.guess_encoding() with an empty string """
     result = encoding_utils.guess_encoding(''.encode('utf-8'))
     self.assertEqual(result, 'ascii')
Beispiel #10
0
 def test_guess_encoding_no_data(self):
     """ Test encoding_utils.guess_encoding() with an empty string """
     result = encoding_utils.guess_encoding(u''.encode('utf-8'))
     self.assertEqual(result, 'ascii')
 def test_guess_encoding_no_data(self):
     """ Test encoding_utils.guess_encoding() with an empty string """
     result = encoding_utils.guess_encoding("".encode("utf-8"))
     self.assertEqual(result, "ascii")
 def test_guess_encoding_no_data(self):
     """ Test encoding_utils.guess_encoding() with an empty string """
     result = encoding_utils.guess_encoding("".encode("utf-8"))
     self.assertEqual(result, "ascii")