def test_strip_tags_flag(self): content = '<div ascii_attribute="some more ascii">λ, λ, λ</div>' set_strip_tags(True) detection = detect(content) self.assertEqual(get_strip_tags(), True) self.assertEqual('UTF-8', detection['encoding']) content2 = '<div ascii_attribute="some more ascii">λ, λ, λ</div>' set_strip_tags(False) detection2 = detect(content2) self.assertEqual(get_strip_tags(), False) self.assertEqual('UTF-8', detection2['encoding'])
def test_detection_works_as_expected(self): for test in self.TEST_FILES: file_path = self.TEST_FILES[test][0] file_result = self.TEST_FILES[test][1] content = open(file_path).read() test_result = detect(content) self.assertEqual(test_result, file_result)
def test_detection_works_as_expected_2(self): for fixture in self.FIXTURE_FILES: filename, encoding, type_ = fixture file_path = 'fixtures/%s' % filename content = open(file_path).read() test_result = detect(content) self.assertEqual(type_, test_result['type']) check_encoding = (encoding == test_result.get('encoding', None)) self.assertTrue(check_encoding)
def detect_encoding(self): """ Try to guess the encoding Returns: a hash, with :encoding, :confidence, :type this will return nil if an error occurred during detection or no valid encoding could be found """ if hasattr(self, '_detect_encoding'): return self._detect_encoding if self.data: self._detect_encoding = pycharlockholmes.detect(self.data) return self._detect_encoding
def get_detection(binary_file): path = '%s/%s' % (binary_bin, binary_file) detected = detect(open(path).read()) return detected
def test_detect_accepts_encoding_hint(self): content = 'test' detected = detect(content, 'UTF-8') self.assertEqual('ISO-8859-1', detected['encoding'])
def test_detect_method(self): content = 'test' detected = detect(content) self.assertEqual('ISO-8859-1', detected['encoding'])