def test_entityrefs(self): reader = HtmlReader() reader.populator = PopulatorMock() reader.feed('<meta content="text/html; charset=utf-8" />') reader.feed('<table><tr><td>Setting</td></tr>') reader.feed('<tr><td>äiti</tr>') assert_equals(reader.populator.tables['Setting'][0], [u'\xe4iti'])
def test_valid_http_equiv_is_required(self): reader = HtmlReader() reader.feed('<meta content="text/html; charset=utf-8" />') assert_equals(reader._encoding, 'ISO-8859-1') reader.feed( '<meta http-equiv="Invalid" content="text/html; charset=utf-8" />') assert_equals(reader._encoding, 'ISO-8859-1')
def test_encoding_is_read_from_meta_tag(self): reader = HtmlReader() reader.feed( '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />' ) assert_equals(reader._encoding, 'utf-8') reader.feed( '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">' ) assert_equals(reader._encoding, 'UTF-8')
def setUp(self): self.reader = HtmlReader() self.reader.populator = PopulatorMock()
def test_encoding_is_set_from_xml_preamble(self): reader = HtmlReader() reader.feed('<?xml version="1.0" encoding="UTF-8"?>') assert_equals(reader._encoding, 'UTF-8') reader.feed('<?xml encoding=US-ASCII version="1.0"?>') assert_equals(reader._encoding, 'US-ASCII')
def test_default_encoding(self): assert_equals(HtmlReader()._encoding, 'ISO-8859-1')
def setUp(self): self.reader = HtmlReader() self.reader.handle_data = self._handle_response