def test_can_detect_utf8_when_cp1252_would_fail(self): test_path = _test_path("utf-8_ok_cp1252_broken") with open(test_path, "wb") as test_file: # Write closing double quote in UTF-8, which contains 0x9d, # which fails when read as CP1252. test_file.write(b"\xe2\x80\x9d") actual_encoding = analysis.encoding_for(test_path, encoding="automatic", fallback_encoding=None) assert actual_encoding == "utf-8" actual_encoding = analysis.encoding_for(test_path, encoding="automatic", fallback_encoding="cp1252") assert actual_encoding == "cp1252"
def test_can_detect_utf8_when_cp1252_would_fail(self): test_path = _test_path('utf-8_ok_cp1252_broken') with open(test_path, 'wb') as test_file: # Write closing double quote in UTF-8, which contains 0x9d, # which fails when read as CP1252. test_file.write(b"\xe2\x80\x9d") actual_encoding = analysis.encoding_for(test_path, encoding='automatic', fallback_encoding=None) self.assertEqual(actual_encoding, 'utf-8') actual_encoding = analysis.encoding_for(test_path, encoding='automatic', fallback_encoding='cp1252') self.assertEqual(actual_encoding, 'cp1252')
def test_can_detect_utf8_when_cp1252_would_fail(self): # Write closing double quote in UTF-8, which contains 0x9d, # which fails when read as CP1252. content = b"\xe2\x80\x9d" test_path = self.create_temp_binary_file("utf-8_ok_cp1252_broken", content) actual_encoding = analysis.encoding_for(test_path, encoding="automatic", fallback_encoding=None) assert actual_encoding == "utf-8" actual_encoding = analysis.encoding_for(test_path, encoding="automatic", fallback_encoding="cp1252") assert actual_encoding == "cp1252"
def test_can_detect_plain_encoding(self): for encoding in ("cp1252", "utf-8"): test_path = self.create_temp_file(encoding, EncodingTest._TEST_CODE, encoding) actual_encoding = analysis.encoding_for(test_path) assert actual_encoding == encoding
def test_can_detect_plain_encoding(self): for encoding in ('cp1252', 'utf-8'): test_path = EncodingTest._test_path(encoding) with open(test_path, 'w', encoding=encoding) as test_file: test_file.write(EncodingTest._TEST_CODE) actual_encoding = analysis.encoding_for(test_path) self.assertEqual(actual_encoding, encoding)
def test_can_detect_plain_encoding(self): for encoding in ("cp1252", "utf-8"): test_path = _test_path(encoding) with open(test_path, "w", encoding=encoding) as test_file: test_file.write(EncodingTest._TEST_CODE) actual_encoding = analysis.encoding_for(test_path) assert actual_encoding == encoding
def test_can_detect_plain_encoding(self): for encoding in ('cp1252', 'utf-8'): test_path = _test_path(encoding) with open(test_path, 'w', encoding=encoding) as test_file: test_file.write(EncodingTest._TEST_CODE) actual_encoding = analysis.encoding_for(test_path) self.assertEqual(actual_encoding, encoding)
def test_can_detect_xml_prolog(self): encoding = "iso-8859-15" xml_code = '<?xml encoding="{0}" standalone="yes"?><some>{1}</some>'.format( encoding, EncodingTest._TEST_CODE) test_path = self.create_temp_file(encoding + ".xml", xml_code, encoding) actual_encoding = analysis.encoding_for(test_path) assert actual_encoding == encoding
def test_can_detect_magic_comment(self): encoding = 'iso-8859-15' test_path = EncodingTest._test_path('magic-' + encoding) with open(test_path, 'w', encoding=encoding) as test_file: test_file.write('#!/usr/bin/python\n') test_file.write('# -*- coding: {0} -*-\n'.format(encoding)) test_file.write(EncodingTest._TEST_CODE) actual_encoding = analysis.encoding_for(test_path) self.assertEqual(actual_encoding, encoding)
def test_can_detect_xml_prolog(self): encoding = 'iso-8859-15' test_path = EncodingTest._test_path('xml-' + encoding) with open(test_path, 'w', encoding=encoding) as test_file: xml_code = '<?xml encoding="{0}" standalone="yes"?><some>{1}</some>'.format( encoding, EncodingTest._TEST_CODE) test_file.write(xml_code) actual_encoding = analysis.encoding_for(test_path) self.assertEqual(actual_encoding, encoding)
def _test_can_detect_bom_encoding(self, encoding): test_path = EncodingTest._test_path(encoding) with open(test_path, 'wb') as test_file: if encoding != 'utf-8-sig': bom = EncodingTest._ENCODING_TO_BOM_MAP[encoding] test_file.write(bom) test_file.write(EncodingTest._TEST_CODE.encode(encoding)) actual_encoding = analysis.encoding_for(test_path) self.assertEqual(actual_encoding, encoding)
def test_can_use_hardcoded_ending(self): test_path = _test_path('hardcoded_cp1252') with open(test_path, 'w', encoding='cp1252') as test_file: test_file.write('\N{EURO SIGN}') actual_encoding = analysis.encoding_for(test_path, 'utf-8') self.assertEqual(actual_encoding, 'utf-8') # Make sure that we cannot actually read the file using the hardcoded but wrong encoding. with open(test_path, 'r', encoding=actual_encoding) as broken_test_file: self.assertRaises(UnicodeDecodeError, broken_test_file.read)
def test_can_detect_magic_comment(self): encoding = 'iso-8859-15' test_path = _test_path('magic-' + encoding) with open(test_path, 'w', encoding=encoding) as test_file: test_file.write('#!/usr/bin/python\n') test_file.write('# -*- coding: {0} -*-\n'.format(encoding)) test_file.write(EncodingTest._TEST_CODE) actual_encoding = analysis.encoding_for(test_path) self.assertEqual(actual_encoding, encoding)
def test_can_detect_xml_prolog(self): encoding = 'iso-8859-15' test_path = _test_path('xml-' + encoding) with open(test_path, 'w', encoding=encoding) as test_file: xml_code = '<?xml encoding="{0}" standalone="yes"?><some>{1}</some>'.format( encoding, EncodingTest._TEST_CODE) test_file.write(xml_code) actual_encoding = analysis.encoding_for(test_path) self.assertEqual(actual_encoding, encoding)
def _test_can_detect_bom_encoding(self, encoding): test_path = _test_path(encoding) with open(test_path, 'wb') as test_file: if encoding != 'utf-8-sig': bom = EncodingTest._ENCODING_TO_BOM_MAP[encoding] test_file.write(bom) test_file.write(EncodingTest._TEST_CODE.encode(encoding)) actual_encoding = analysis.encoding_for(test_path) self.assertEqual(actual_encoding, encoding)
def test_can_detect_magic_comment(self): encoding = "iso-8859-15" test_path = _test_path("magic-" + encoding) with open(test_path, "w", encoding=encoding) as test_file: test_file.write("#!/usr/bin/python\n") test_file.write("# -*- coding: {0} -*-\n".format(encoding)) test_file.write(EncodingTest._TEST_CODE) actual_encoding = analysis.encoding_for(test_path) assert actual_encoding == encoding
def test_can_detect_xml_prolog(self): encoding = "iso-8859-15" test_path = _test_path("xml-" + encoding) with open(test_path, "w", encoding=encoding) as test_file: xml_code = '<?xml encoding="{0}" standalone="yes"?><some>{1}</some>'.format( encoding, EncodingTest._TEST_CODE) test_file.write(xml_code) actual_encoding = analysis.encoding_for(test_path) assert actual_encoding == encoding
def _test_can_detect_bom_encoding(self, encoding): test_path = _test_path(encoding) with open(test_path, "wb") as test_file: if encoding != "utf-8-sig": bom = EncodingTest._ENCODING_TO_BOM_MAP[encoding] test_file.write(bom) test_file.write(EncodingTest._TEST_CODE.encode(encoding)) actual_encoding = analysis.encoding_for(test_path) assert actual_encoding == encoding
def test_can_detect_magic_comment(self): encoding = "iso-8859-15" lines = [ "#!/usr/bin/python", "# -*- coding: {0} -*-".format(encoding), EncodingTest._TEST_CODE ] test_path = self.create_temp_file("magic-" + encoding, lines, encoding) actual_encoding = analysis.encoding_for(test_path) assert actual_encoding == encoding
def test_can_use_hardcoded_ending(self): test_path = self.create_temp_file("hardcoded_cp1252", "\N{EURO SIGN}", "cp1252") actual_encoding = analysis.encoding_for(test_path, "utf-8") assert actual_encoding == "utf-8" # Make sure that we cannot actually read the file using the hardcoded but wrong encoding. with open(test_path, "r", encoding=actual_encoding) as broken_test_file: with pytest.raises(UnicodeDecodeError): broken_test_file.read()
def test_fails_on_unknown_magic_encoding_comment(self): test_path = _test_path('unknown_magic_encoding_comment', 'py') with open(test_path, 'w', encoding='utf-8') as test_file: test_file.write('# -*- coding: no_such_encoding -*-') test_file.write('print("hello")') no_such_encoding = analysis.encoding_for(test_path) self.assertEqual(no_such_encoding, 'no_such_encoding') source_analysis = analysis.source_analysis(test_path, 'test', encoding=no_such_encoding) self.assertEqual(source_analysis.language, '__error__') self.assertEqual(source_analysis.state, analysis.SourceState.error.name) self.assertRegex(str(source_analysis.state_info), '.*unknown encoding')
def test_fails_on_unknown_magic_encoding_comment(self): test_path = self.create_temp_file( "unknown_magic_encoding_comment.py", ["# -*- coding: no_such_encoding -*-", 'print("hello")']) no_such_encoding = analysis.encoding_for(test_path) assert no_such_encoding == "no_such_encoding" source_analysis = analysis.SourceAnalysis.from_file( test_path, "test", encoding=no_such_encoding) assert source_analysis.language == "__error__" assert source_analysis.state == analysis.SourceState.error assert "unknown encoding" in str(source_analysis.state_info)
def test_fails_on_unknown_magic_encoding_comment(self): test_path = _test_path("unknown_magic_encoding_comment", "py") with open(test_path, "w", encoding="utf-8") as test_file: test_file.write("# -*- coding: no_such_encoding -*-") test_file.write('print("hello")') no_such_encoding = analysis.encoding_for(test_path) assert no_such_encoding == "no_such_encoding" source_analysis = analysis.source_analysis(test_path, "test", encoding=no_such_encoding) assert source_analysis.language == "__error__" assert source_analysis.state == analysis.SourceState.error.name assert "unknown encoding" in str(source_analysis.state_info)
def test_can_detect_automatic_encoding_for_empty_source(self): test_path = _test_path('empty') with open(test_path, 'wb') as _: pass # Write empty file. actual_encoding = analysis.encoding_for(test_path) self.assertEqual(actual_encoding, 'utf-8')
def test_can_detect_automatic_encoding_for_empty_source(self): test_path = self.create_temp_binary_file("empty", b"") actual_encoding = analysis.encoding_for(test_path) assert actual_encoding == "utf-8"
def test_can_detect_chardet_encoding(self): test_path = __file__ actual_encoding = analysis.encoding_for(test_path) self.assertEqual(actual_encoding, 'utf-8')
def test_can_detect_automatic_encoding_for_empty_source(self): test_path = _test_path("empty") with open(test_path, "wb") as _: pass # Write empty file. actual_encoding = analysis.encoding_for(test_path) assert actual_encoding == "utf-8"
def test_can_detect_chardet_encoding(self): test_path = __file__ actual_encoding = analysis.encoding_for(test_path) assert actual_encoding == "utf-8"
def test_can_detect_automatic_encoding_for_empty_source(self): test_path = EncodingTest._test_path('empty') with open(test_path, 'wb') as _: pass # Write empty file. actual_encoding = analysis.encoding_for(test_path) self.assertEqual(actual_encoding, 'utf-8')