def implementation(self, fragment_context, html, expected, errors, test_name): if '<!-- Starts with UTF-8 BOM -->' in html: raw = b'\xef\xbb\xbf' + html[3:].encode('ascii') self.assertIs(check_bom(raw), codecs.BOM_UTF8) return if '''document.write('<meta charset="ISO-8859-' + '2">')''' in html: raise unittest.SkipTest('buggy html5lib test') raw = html.encode('utf-8') output = check_bom(raw) or check_for_meta_charset(raw) or 'windows-1252' error_msg = '\n'.join( map(type(''), ['\n\nInput:', html, '\nExpected:', expected, '\nReceived:', output])) self.ae(expected.lower(), output, error_msg + '\n')
def test_check_bom(self): for bom in BOMS: self.assertIs(bom, check_bom(bom + b'xxx'))