Ejemplo n.º 1
0
    def test_strip_tags_flag(self):
        content = '<div ascii_attribute="some more ascii">λ, λ, λ</div>'
        set_strip_tags(True)
        detection = detect(content)
        self.assertEqual(get_strip_tags(), True)

        self.assertEqual('UTF-8', detection['encoding'])

        content2 = '<div ascii_attribute="some more ascii">λ, λ, λ</div>'
        set_strip_tags(False)
        detection2 = detect(content2)
        self.assertEqual(get_strip_tags(), False)

        self.assertEqual('UTF-8', detection2['encoding'])
Ejemplo n.º 2
0
 def test_detection_works_as_expected(self):
     for test in self.TEST_FILES:
         file_path = self.TEST_FILES[test][0]
         file_result = self.TEST_FILES[test][1]
         content = open(file_path).read()
         test_result = detect(content)
         self.assertEqual(test_result, file_result)
Ejemplo n.º 3
0
    def test_detection_works_as_expected_2(self):
        for fixture in self.FIXTURE_FILES:
            filename, encoding, type_ = fixture

            file_path = 'fixtures/%s' % filename
            content = open(file_path).read()

            test_result = detect(content)

            self.assertEqual(type_, test_result['type'])

            check_encoding = (encoding == test_result.get('encoding', None))
            self.assertTrue(check_encoding)
Ejemplo n.º 4
0
    def detect_encoding(self):
        """
        Try to guess the encoding

        Returns: a hash, with :encoding, :confidence, :type
                 this will return nil if an error occurred during detection or
                 no valid encoding could be found
        """
        if hasattr(self, '_detect_encoding'):
            return self._detect_encoding

        if self.data:
            self._detect_encoding = pycharlockholmes.detect(self.data)
            return self._detect_encoding
Ejemplo n.º 5
0
        def get_detection(binary_file):
            path = '%s/%s' % (binary_bin, binary_file)

            detected = detect(open(path).read())
            return detected
Ejemplo n.º 6
0
    def test_detect_accepts_encoding_hint(self):
        content = 'test'
        detected = detect(content, 'UTF-8')

        self.assertEqual('ISO-8859-1', detected['encoding'])
Ejemplo n.º 7
0
    def test_detect_method(self):
        content = 'test'
        detected = detect(content)

        self.assertEqual('ISO-8859-1', detected['encoding'])