def test_text_node_pos_single_line(self): text = '<elem>foo bar</elem>' events = list(XMLParser(StringIO(text))) kind, data, pos = events[1] self.assertEqual(Stream.TEXT, kind) self.assertEqual('foo bar', data) self.assertEqual((None, 1, 6), pos)
def test_hex_charref(self): text = '<span>'</span>' events = list(HTMLParser(StringIO(text))) self.assertEqual(3, len(events)) self.assertEqual((Stream.START, ('span', ())), events[0][:2]) self.assertEqual((Stream.TEXT, "'"), events[1][:2]) self.assertEqual((Stream.END, 'span'), events[2][:2])
def test_processing_instruction_trailing_qmark(self): text = '<?php echo "Foobar" ??>' events = list(HTMLParser(StringIO(text))) kind, (target, data), pos = events[0] self.assertEqual(Stream.PI, kind) self.assertEqual('php', target) self.assertEqual('echo "Foobar" ?', data)
def HTML(text, encoding=None): """Parse the given HTML source and return a markup stream. Unlike with `HTMLParser`, the returned stream is reusable, meaning it can be iterated over multiple times: >>> html = HTML('<body><h1>Foo</h1></body>', encoding='utf-8') >>> print(html) <body><h1>Foo</h1></body> >>> print((html.select('h1'))) <h1>Foo</h1> >>> print((html.select('h1/text()'))) Foo :param text: the HTML source :return: the parsed XML event stream :raises ParseError: if the HTML text is not well-formed, and error recovery fails """ if isinstance(text, str): # If it's unicode text the encoding should be set to None. # The option to pass in an incorrect encoding is for ease # of writing doctests that work in both Python 2.x and 3.x. return Stream(list(HTMLParser(StringIO(text), encoding=None))) return Stream(list(HTMLParser(BytesIO(text), encoding=encoding)))
def test_undefined_entity_with_dtd(self): text = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html>&junk;</html> """ events = XMLParser(StringIO(text)) self.assertRaises(ParseError, list, events)
def test_processing_instruction_no_data_1(self): text = u'<?foo ?>' events = list(HTMLParser(StringIO(text))) kind, (target, data), pos = events[0] self.assertEqual(Stream.PI, kind) self.assertEqual('foo', target) self.assertEqual('', data)
def test_xmldecl_standalone(self): text = '<?xml version="1.0" standalone="yes" ?><root />' events = list(XMLParser(StringIO(text))) kind, (version, encoding, standalone), pos = events[0] self.assertEqual(Stream.XML_DECL, kind) self.assertEqual('1.0', version) self.assertEqual(None, encoding) self.assertEqual(1, standalone)
def test_xmldecl_encoding(self): text = '<?xml version="1.0" encoding="utf-8" ?><root />' events = list(XMLParser(StringIO(text))) kind, (version, encoding, standalone), pos = events[0] self.assertEqual(Stream.XML_DECL, kind) self.assertEqual('1.0', version) self.assertEqual('utf-8', encoding) self.assertEqual(-1, standalone)
def test_html_entity_in_attribute(self): text = '<p title=" "/>' events = list(XMLParser(StringIO(text))) kind, data, pos = events[0] self.assertEqual(Stream.START, kind) self.assertEqual('\xa0', data[1].get('title')) kind, data, pos = events[1] self.assertEqual(Stream.END, kind)
def test_out_of_order_tags1(self): text = '<span><b>Foobar</span></b>' events = list(HTMLParser(StringIO(text))) self.assertEqual(5, len(events)) self.assertEqual((Stream.START, ('span', ())), events[0][:2]) self.assertEqual((Stream.START, ('b', ())), events[1][:2]) self.assertEqual((Stream.TEXT, 'Foobar'), events[2][:2]) self.assertEqual((Stream.END, 'b'), events[3][:2]) self.assertEqual((Stream.END, 'span'), events[4][:2])
def test_html_entity_with_dtd(self): text = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html> </html> """ events = list(XMLParser(StringIO(text))) kind, data, pos = events[2] self.assertEqual(Stream.TEXT, kind) self.assertEqual('\xa0', data)
def test_processing_instruction_no_data_2(self): text = u'<?experiment>...<?/experiment>' events = list(HTMLParser(StringIO(text))) kind, (target, data), pos = events[0] self.assertEqual(Stream.PI, kind) self.assertEqual('experiment', target) self.assertEqual('', data) kind, (target, data), pos = events[2] self.assertEqual('/experiment', target) self.assertEqual('', data)
def test_element_attribute_order(self): text = '<elem title="baz" id="foo" class="bar" />' events = list(XMLParser(StringIO(text))) kind, data, pos = events[0] self.assertEqual(Stream.START, kind) tag, attrib = data self.assertEqual('elem', tag) self.assertEqual(('title', 'baz'), attrib[0]) self.assertEqual(('id', 'foo'), attrib[1]) self.assertEqual(('class', 'bar'), attrib[2])
def __init__(self, source, filepath=None, filename=None, loader=None, encoding=None, lookup='strict', allow_exec=True): """Initialize a template from either a string, a file-like object, or an already parsed markup stream. :param source: a string, file-like object, or markup stream to read the template from :param filepath: the absolute path to the template file :param filename: the path to the template file relative to the search path :param loader: the `TemplateLoader` to use for loading included templates :param encoding: the encoding of the `source` :param lookup: the variable lookup mechanism; either "strict" (the default), "lenient", or a custom lookup class :param allow_exec: whether Python code blocks in templates should be allowed :note: Changed in 0.5: Added the `allow_exec` argument """ self.filepath = filepath or filename self.filename = filename self.loader = loader self.lookup = lookup self.allow_exec = allow_exec self._init_filters() self._init_loader() self._prepared = False if not isinstance(source, Stream) and not hasattr(source, 'read'): if isinstance(source, str): source = StringIO(source) else: source = BytesIO(source) try: self._stream = self._parse(source, encoding) except ParseError as e: raise TemplateSyntaxError(e.msg, self.filepath, e.lineno, e.offset)
def XML(text): """Parse the given XML source and return a markup stream. Unlike with `XMLParser`, the returned stream is reusable, meaning it can be iterated over multiple times: >>> xml = XML('<doc><elem>Foo</elem><elem>Bar</elem></doc>') >>> print(xml) <doc><elem>Foo</elem><elem>Bar</elem></doc> >>> print(xml.select('elem')) <elem>Foo</elem><elem>Bar</elem> >>> print(xml.select('elem/text()')) FooBar :param text: the XML source :return: the parsed XML event stream :raises ParseError: if the XML text is not well-formed """ return Stream(list(XMLParser(StringIO(text))))
def HTML(text, encoding=None): """Parse the given HTML source and return a markup stream. Unlike with `HTMLParser`, the returned stream is reusable, meaning it can be iterated over multiple times: >>> html = HTML('<body><h1>Foo</h1></body>', encoding='utf-8') >>> print(html) <body><h1>Foo</h1></body> >>> print(html.select('h1')) <h1>Foo</h1> >>> print(html.select('h1/text()')) Foo :param text: the HTML source :return: the parsed XML event stream :raises ParseError: if the HTML text is not well-formed, and error recovery fails """ if isinstance(text, str): return Stream(list(HTMLParser(StringIO(text), encoding=encoding))) return Stream(list(HTMLParser(BytesIO(text), encoding=encoding)))
def test_render_output_stream_unicode(self): xml = XML('<li>Über uns</li>') strio = StringIO() self.assertEqual(None, xml.render(encoding=None, out=strio)) self.assertEqual('<li>Über uns</li>', strio.getvalue())
def test_parse_fileobj(self): fileobj = StringIO('<root> ${var} $var</root>') tmpl = MarkupTemplate(fileobj) self.assertEqual('<root> 42 42</root>', str(tmpl.generate(var=42)))
def test_undefined_entity_without_dtd(self): text = '<html>&junk;</html>' events = XMLParser(StringIO(text)) self.assertRaises(ParseError, list, events)
def test_html_entity_in_text(self): text = '<p> </p>' events = list(HTMLParser(StringIO(text))) kind, data, pos = events[1] self.assertEqual(Stream.TEXT, kind) self.assertEqual('\xa0', data)
def test_html_entity_without_dtd(self): text = '<html> </html>' events = list(XMLParser(StringIO(text))) kind, data, pos = events[1] self.assertEqual(Stream.TEXT, kind) self.assertEqual('\xa0', data)
def test_unicode_input(self): text = '<div>\u2013</div>' events = list(XMLParser(StringIO(text))) kind, data, pos = events[1] self.assertEqual(Stream.TEXT, kind) self.assertEqual('\u2013', data)