def handle_starttag(self, tag, attrib): fixed_attrib = [] for name, value in attrib: # Fixup minimized attributes if value is None: value = unicode(name) elif not isinstance(value, unicode): value = value.decode(self.encoding, 'replace') fixed_attrib.append((QName(name), stripentities(value))) self._enqueue(START, (QName(tag), Attrs(fixed_attrib))) if tag in self._EMPTY_ELEMS: self._enqueue(END, QName(tag)) else: self._open_tags.append(tag)
def handle_endtag(self, tag): if tag not in self._EMPTY_ELEMS: while self._open_tags: open_tag = self._open_tags.pop() self._enqueue(END, QName(open_tag)) if open_tag.lower() == tag.lower(): break
def __call__(self, kind, data, pos, namespaces, variables): qname = QName('%s}%s' % (namespaces.get(self.prefix), self.name)) if kind is START: if self.principal_type is ATTRIBUTE and qname in data[1]: return data[1].get(qname) else: return data[0] == qname
def ET(element): """Convert a given ElementTree element to a markup stream. :param element: an ElementTree element :return: a markup stream """ tag_name = QName(element.tag.lstrip('{')) attrs = Attrs([(QName(attr.lstrip('{')), value) for attr, value in element.items()]) yield START, (tag_name, attrs), (None, -1, -1) if element.text: yield TEXT, element.text, (None, -1, -1) for child in element.getchildren(): for item in ET(child): yield item yield END, tag_name, (None, -1, -1) if element.tail: yield TEXT, element.tail, (None, -1, -1)
def _generate(): kind, (tag, attrib), pos = stream.next() attrs = self.expr.evaluate(ctxt) if attrs: if isinstance(attrs, Stream): try: attrs = iter(attrs).next() except StopIteration: attrs = [] elif not isinstance(attrs, list): # assume it's a dict attrs = attrs.items() attrib -= [name for name, val in attrs if val is None] attrib |= [(QName(name), unicode(val).strip()) for name, val in attrs if val is not None] yield kind, (tag, attrib), pos for event in stream: yield event
def _generate(): try: bufsize = 4 * 1024 # 4K done = False while 1: while not done and len(self._queue) == 0: data = self.source.read(bufsize) if data == '': # end of data self.close() done = True else: self.feed(data) for kind, data, pos in self._queue: yield kind, data, pos self._queue = [] if done: open_tags = self._open_tags open_tags.reverse() for tag in open_tags: yield END, QName(tag), pos break except html.HTMLParseError, e: msg = '%s: line %d, column %d' % (e.msg, e.lineno, e.offset) raise ParseError(msg, self.filename, e.lineno, e.offset)
def test_multiple_bound_namespaces(self): stream = Stream([ (Stream.START, (QName('div'), Attrs()), (None, -1, -1)), (Stream.TEXT, '\n ', (None, -1, -1)), (Stream.START_NS, ('x', 'http://example.org/'), (None, -1, -1)), (Stream.START, (QName('http://example.org/}p'), Attrs()), (None, -1, -1)), (Stream.END, QName('http://example.org/}p'), (None, -1, -1)), (Stream.END_NS, 'x', (None, -1, -1)), (Stream.TEXT, '\n ', (None, -1, -1)), (Stream.START_NS, ('x', 'http://example.org/'), (None, -1, -1)), (Stream.START, (QName('http://example.org/}p'), Attrs()), (None, -1, -1)), (Stream.END, QName('http://example.org/}p'), (None, -1, -1)), (Stream.END_NS, 'x', (None, -1, -1)), (Stream.TEXT, '\n ', (None, -1, -1)), (Stream.END, QName('div'), (None, -1, -1)), ]) output = stream.render(XMLSerializer) self.assertEqual("""<div> <x:p xmlns:x="http://example.org/"/> <x:p xmlns:x="http://example.org/"/> </div>""", output)
def test_nested_default_namespaces(self): stream = Stream([ (Stream.START_NS, ('', 'http://example.org/'), (None, -1, -1)), (Stream.START, (QName('http://example.org/}div'), Attrs()), (None, -1, -1)), (Stream.TEXT, '\n ', (None, -1, -1)), (Stream.START_NS, ('', 'http://example.org/'), (None, -1, -1)), (Stream.START, (QName('http://example.org/}p'), Attrs()), (None, -1, -1)), (Stream.END, QName('http://example.org/}p'), (None, -1, -1)), (Stream.END_NS, '', (None, -1, -1)), (Stream.TEXT, '\n ', (None, -1, -1)), (Stream.START_NS, ('', 'http://example.org/'), (None, -1, -1)), (Stream.START, (QName('http://example.org/}p'), Attrs()), (None, -1, -1)), (Stream.END, QName('http://example.org/}p'), (None, -1, -1)), (Stream.END_NS, '', (None, -1, -1)), (Stream.TEXT, '\n ', (None, -1, -1)), (Stream.END, QName('http://example.org/}div'), (None, -1, -1)), (Stream.END_NS, '', (None, -1, -1)) ]) output = stream.render(XMLSerializer) self.assertEqual("""<div xmlns="http://example.org/"> <p/> <p/> </div>""", output)
class HTMLSerializer(XHTMLSerializer): """Produces HTML text from an event stream. >>> from libs.genshi.builder import tag >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True)) >>> print ''.join(HTMLSerializer()(elem.generate())) <div><a href="foo"></a><br><hr noshade></div> """ _NOESCAPE_ELEMS = frozenset([ QName('script'), QName('http://www.w3.org/1999/xhtml}script'), QName('style'), QName('http://www.w3.org/1999/xhtml}style') ]) def __init__(self, doctype=None, strip_whitespace=True): """Initialize the HTML serializer. :param doctype: a ``(name, pubid, sysid)`` tuple that represents the DOCTYPE declaration that should be included at the top of the generated output :param strip_whitespace: whether extraneous whitespace should be stripped from the output """ super(HTMLSerializer, self).__init__(doctype, False) self.filters = [EmptyTagFilter()] if strip_whitespace: self.filters.append( WhitespaceFilter(self._PRESERVE_SPACE, self._NOESCAPE_ELEMS)) self.filters.append(NamespaceStripper('http://www.w3.org/1999/xhtml')) def __call__(self, stream): boolean_attrs = self._BOOLEAN_ATTRS empty_elems = self._EMPTY_ELEMS noescape_elems = self._NOESCAPE_ELEMS have_doctype = False noescape = False stream = chain(self.preamble, stream) for filter_ in self.filters: stream = filter_(stream) for kind, data, pos in stream: if kind is START or kind is EMPTY: tag, attrib = data buf = ['<', tag] for attr, value in attrib: if attr in boolean_attrs: if value: buf += [' ', attr] else: buf += [' ', attr, '="', escape(value), '"'] buf.append('>') if kind is EMPTY: if tag not in empty_elems: buf.append('</%s>' % tag) yield Markup(u''.join(buf)) if tag in noescape_elems: noescape = True elif kind is END: yield Markup('</%s>' % data) noescape = False elif kind is TEXT: if noescape: yield data else: yield escape(data, quotes=False) elif kind is COMMENT: yield Markup('<!--%s-->' % data) elif kind is DOCTYPE and not have_doctype: name, pubid, sysid = data buf = ['<!DOCTYPE %s'] if pubid: buf.append(' PUBLIC "%s"') elif sysid: buf.append(' SYSTEM') if sysid: buf.append(' "%s"') buf.append('>\n') yield Markup(u''.join(buf), *filter(None, data)) have_doctype = True elif kind is PI: yield Markup('<?%s %s?>' % data)
class XHTMLSerializer(XMLSerializer): """Produces XHTML text from an event stream. >>> from libs.genshi.builder import tag >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True)) >>> print ''.join(XHTMLSerializer()(elem.generate())) <div><a href="foo"></a><br /><hr noshade="noshade" /></div> """ _EMPTY_ELEMS = frozenset([ 'area', 'base', 'basefont', 'br', 'col', 'frame', 'hr', 'img', 'input', 'isindex', 'link', 'meta', 'param' ]) _BOOLEAN_ATTRS = frozenset([ 'selected', 'checked', 'compact', 'declare', 'defer', 'disabled', 'ismap', 'multiple', 'nohref', 'noresize', 'noshade', 'nowrap' ]) _PRESERVE_SPACE = frozenset([ QName('pre'), QName('http://www.w3.org/1999/xhtml}pre'), QName('textarea'), QName('http://www.w3.org/1999/xhtml}textarea') ]) def __init__(self, doctype=None, strip_whitespace=True, namespace_prefixes=None): super(XHTMLSerializer, self).__init__(doctype, False) self.filters = [EmptyTagFilter()] if strip_whitespace: self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE)) namespace_prefixes = namespace_prefixes or {} namespace_prefixes['http://www.w3.org/1999/xhtml'] = '' self.filters.append(NamespaceFlattener(prefixes=namespace_prefixes)) def __call__(self, stream): boolean_attrs = self._BOOLEAN_ATTRS empty_elems = self._EMPTY_ELEMS have_doctype = False in_cdata = False stream = chain(self.preamble, stream) for filter_ in self.filters: stream = filter_(stream) for kind, data, pos in stream: if kind is START or kind is EMPTY: tag, attrib = data buf = ['<', tag] for attr, value in attrib: if attr in boolean_attrs: value = attr buf += [' ', attr, '="', escape(value), '"'] if kind is EMPTY: if tag in empty_elems: buf.append(' />') else: buf.append('></%s>' % tag) else: buf.append('>') yield Markup(u''.join(buf)) elif kind is END: yield Markup('</%s>' % data) elif kind is TEXT: if in_cdata: yield data else: yield escape(data, quotes=False) elif kind is COMMENT: yield Markup('<!--%s-->' % data) elif kind is DOCTYPE and not have_doctype: name, pubid, sysid = data buf = ['<!DOCTYPE %s'] if pubid: buf.append(' PUBLIC "%s"') elif sysid: buf.append(' SYSTEM') if sysid: buf.append(' "%s"') buf.append('>\n') yield Markup(u''.join(buf), *filter(None, data)) have_doctype = True elif kind is START_CDATA: yield Markup('<![CDATA[') in_cdata = True elif kind is END_CDATA: yield Markup(']]>') in_cdata = False elif kind is PI: yield Markup('<?%s %s?>' % data)
def _handle_end(self, tag): self._enqueue(END, QName(tag))
def _handle_start(self, tag, attrib): attrs = Attrs([(QName(name), value) for name, value in zip(*[iter(attrib)] * 2)]) self._enqueue(START, (QName(tag), attrs))