def test_bom(): stream = HTMLInputStream(codecs.BOM_UTF8 + b"'") assert stream.charEncoding[0].name == 'utf-8' assert stream.char() == "'"
def test_char_utf8(): stream = HTMLInputStream('\u2018'.encode('utf-8'), override_encoding='utf-8') assert stream.charEncoding[0].name == 'utf-8' assert stream.char() == '\u2018'
def test_char_win1252(): stream = HTMLInputStream("\xa9\xf1\u2019".encode('windows-1252')) assert stream.charEncoding[0].name == 'windows-1252' assert stream.char() == "\xa9" assert stream.char() == "\xf1" assert stream.char() == "\u2019"
def test_char_ascii(): stream = HTMLInputStream(b"'", override_encoding='ascii') assert stream.charEncoding[0].name == 'windows-1252' assert stream.char() == "'"