Exemple #1
0
    def handle_starttag(self, tag, attrib):
        fixed_attrib = []
        for name, value in attrib:  # Fixup minimized attributes
            if value is None:
                value = unicode(name)
            elif not isinstance(value, unicode):
                value = value.decode(self.encoding, 'replace')
            fixed_attrib.append((QName(name), stripentities(value)))

        self._enqueue(START, (QName(tag), Attrs(fixed_attrib)))
        if tag in self._EMPTY_ELEMS:
            self._enqueue(END, QName(tag))
        else:
            self._open_tags.append(tag)
Exemple #2
0
 def handle_endtag(self, tag):
     if tag not in self._EMPTY_ELEMS:
         while self._open_tags:
             open_tag = self._open_tags.pop()
             self._enqueue(END, QName(open_tag))
             if open_tag.lower() == tag.lower():
                 break
Exemple #3
0
 def __call__(self, kind, data, pos, namespaces, variables):
     qname = QName('%s}%s' % (namespaces.get(self.prefix), self.name))
     if kind is START:
         if self.principal_type is ATTRIBUTE and qname in data[1]:
             return data[1].get(qname)
         else:
             return data[0] == qname
Exemple #4
0
def ET(element):
    """Convert a given ElementTree element to a markup stream.
    
    :param element: an ElementTree element
    :return: a markup stream
    """
    tag_name = QName(element.tag.lstrip('{'))
    attrs = Attrs([(QName(attr.lstrip('{')), value)
                   for attr, value in element.items()])

    yield START, (tag_name, attrs), (None, -1, -1)
    if element.text:
        yield TEXT, element.text, (None, -1, -1)
    for child in element.getchildren():
        for item in ET(child):
            yield item
    yield END, tag_name, (None, -1, -1)
    if element.tail:
        yield TEXT, element.tail, (None, -1, -1)
Exemple #5
0
 def _generate():
     kind, (tag, attrib), pos = stream.next()
     attrs = self.expr.evaluate(ctxt)
     if attrs:
         if isinstance(attrs, Stream):
             try:
                 attrs = iter(attrs).next()
             except StopIteration:
                 attrs = []
         elif not isinstance(attrs, list):  # assume it's a dict
             attrs = attrs.items()
         attrib -= [name for name, val in attrs if val is None]
         attrib |= [(QName(name), unicode(val).strip())
                    for name, val in attrs if val is not None]
     yield kind, (tag, attrib), pos
     for event in stream:
         yield event
Exemple #6
0
 def _generate():
     try:
         bufsize = 4 * 1024  # 4K
         done = False
         while 1:
             while not done and len(self._queue) == 0:
                 data = self.source.read(bufsize)
                 if data == '':  # end of data
                     self.close()
                     done = True
                 else:
                     self.feed(data)
             for kind, data, pos in self._queue:
                 yield kind, data, pos
             self._queue = []
             if done:
                 open_tags = self._open_tags
                 open_tags.reverse()
                 for tag in open_tags:
                     yield END, QName(tag), pos
                 break
     except html.HTMLParseError, e:
         msg = '%s: line %d, column %d' % (e.msg, e.lineno, e.offset)
         raise ParseError(msg, self.filename, e.lineno, e.offset)
Exemple #7
0
 def test_multiple_bound_namespaces(self):
     stream = Stream([
         (Stream.START, (QName('div'), Attrs()), (None, -1, -1)),
         (Stream.TEXT, '\n          ', (None, -1, -1)),
         (Stream.START_NS, ('x', 'http://example.org/'), (None, -1, -1)),
         (Stream.START, (QName('http://example.org/}p'), Attrs()), (None, -1, -1)),
         (Stream.END, QName('http://example.org/}p'), (None, -1, -1)),
         (Stream.END_NS, 'x', (None, -1, -1)),
         (Stream.TEXT, '\n          ', (None, -1, -1)),
         (Stream.START_NS, ('x', 'http://example.org/'), (None, -1, -1)),
         (Stream.START, (QName('http://example.org/}p'), Attrs()), (None, -1, -1)),
         (Stream.END, QName('http://example.org/}p'), (None, -1, -1)),
         (Stream.END_NS, 'x', (None, -1, -1)),
         (Stream.TEXT, '\n        ', (None, -1, -1)),
         (Stream.END, QName('div'), (None, -1, -1)),
     ])
     output = stream.render(XMLSerializer)
     self.assertEqual("""<div>
       <x:p xmlns:x="http://example.org/"/>
       <x:p xmlns:x="http://example.org/"/>
     </div>""", output)
Exemple #8
0
 def test_nested_default_namespaces(self):
     stream = Stream([
         (Stream.START_NS, ('', 'http://example.org/'), (None, -1, -1)),
         (Stream.START, (QName('http://example.org/}div'), Attrs()), (None, -1, -1)),
         (Stream.TEXT, '\n          ', (None, -1, -1)),
         (Stream.START_NS, ('', 'http://example.org/'), (None, -1, -1)),
         (Stream.START, (QName('http://example.org/}p'), Attrs()), (None, -1, -1)),
         (Stream.END, QName('http://example.org/}p'), (None, -1, -1)),
         (Stream.END_NS, '', (None, -1, -1)),
         (Stream.TEXT, '\n          ', (None, -1, -1)),
         (Stream.START_NS, ('', 'http://example.org/'), (None, -1, -1)),
         (Stream.START, (QName('http://example.org/}p'), Attrs()), (None, -1, -1)),
         (Stream.END, QName('http://example.org/}p'), (None, -1, -1)),
         (Stream.END_NS, '', (None, -1, -1)),
         (Stream.TEXT, '\n        ', (None, -1, -1)),
         (Stream.END, QName('http://example.org/}div'), (None, -1, -1)),
         (Stream.END_NS, '', (None, -1, -1))
     ])
     output = stream.render(XMLSerializer)
     self.assertEqual("""<div xmlns="http://example.org/">
       <p/>
       <p/>
     </div>""", output)
Exemple #9
0
class HTMLSerializer(XHTMLSerializer):
    """Produces HTML text from an event stream.
    
    >>> from libs.genshi.builder import tag
    >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True))
    >>> print ''.join(HTMLSerializer()(elem.generate()))
    <div><a href="foo"></a><br><hr noshade></div>
    """

    _NOESCAPE_ELEMS = frozenset([
        QName('script'),
        QName('http://www.w3.org/1999/xhtml}script'),
        QName('style'),
        QName('http://www.w3.org/1999/xhtml}style')
    ])

    def __init__(self, doctype=None, strip_whitespace=True):
        """Initialize the HTML serializer.
        
        :param doctype: a ``(name, pubid, sysid)`` tuple that represents the
                        DOCTYPE declaration that should be included at the top
                        of the generated output
        :param strip_whitespace: whether extraneous whitespace should be
                                 stripped from the output
        """
        super(HTMLSerializer, self).__init__(doctype, False)
        self.filters = [EmptyTagFilter()]
        if strip_whitespace:
            self.filters.append(
                WhitespaceFilter(self._PRESERVE_SPACE, self._NOESCAPE_ELEMS))
        self.filters.append(NamespaceStripper('http://www.w3.org/1999/xhtml'))

    def __call__(self, stream):
        boolean_attrs = self._BOOLEAN_ATTRS
        empty_elems = self._EMPTY_ELEMS
        noescape_elems = self._NOESCAPE_ELEMS
        have_doctype = False
        noescape = False

        stream = chain(self.preamble, stream)
        for filter_ in self.filters:
            stream = filter_(stream)
        for kind, data, pos in stream:

            if kind is START or kind is EMPTY:
                tag, attrib = data
                buf = ['<', tag]
                for attr, value in attrib:
                    if attr in boolean_attrs:
                        if value:
                            buf += [' ', attr]
                    else:
                        buf += [' ', attr, '="', escape(value), '"']
                buf.append('>')
                if kind is EMPTY:
                    if tag not in empty_elems:
                        buf.append('</%s>' % tag)
                yield Markup(u''.join(buf))
                if tag in noescape_elems:
                    noescape = True

            elif kind is END:
                yield Markup('</%s>' % data)
                noescape = False

            elif kind is TEXT:
                if noescape:
                    yield data
                else:
                    yield escape(data, quotes=False)

            elif kind is COMMENT:
                yield Markup('<!--%s-->' % data)

            elif kind is DOCTYPE and not have_doctype:
                name, pubid, sysid = data
                buf = ['<!DOCTYPE %s']
                if pubid:
                    buf.append(' PUBLIC "%s"')
                elif sysid:
                    buf.append(' SYSTEM')
                if sysid:
                    buf.append(' "%s"')
                buf.append('>\n')
                yield Markup(u''.join(buf), *filter(None, data))
                have_doctype = True

            elif kind is PI:
                yield Markup('<?%s %s?>' % data)
Exemple #10
0
class XHTMLSerializer(XMLSerializer):
    """Produces XHTML text from an event stream.
    
    >>> from libs.genshi.builder import tag
    >>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True))
    >>> print ''.join(XHTMLSerializer()(elem.generate()))
    <div><a href="foo"></a><br /><hr noshade="noshade" /></div>
    """

    _EMPTY_ELEMS = frozenset([
        'area', 'base', 'basefont', 'br', 'col', 'frame', 'hr', 'img', 'input',
        'isindex', 'link', 'meta', 'param'
    ])
    _BOOLEAN_ATTRS = frozenset([
        'selected', 'checked', 'compact', 'declare', 'defer', 'disabled',
        'ismap', 'multiple', 'nohref', 'noresize', 'noshade', 'nowrap'
    ])
    _PRESERVE_SPACE = frozenset([
        QName('pre'),
        QName('http://www.w3.org/1999/xhtml}pre'),
        QName('textarea'),
        QName('http://www.w3.org/1999/xhtml}textarea')
    ])

    def __init__(self,
                 doctype=None,
                 strip_whitespace=True,
                 namespace_prefixes=None):
        super(XHTMLSerializer, self).__init__(doctype, False)
        self.filters = [EmptyTagFilter()]
        if strip_whitespace:
            self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE))
        namespace_prefixes = namespace_prefixes or {}
        namespace_prefixes['http://www.w3.org/1999/xhtml'] = ''
        self.filters.append(NamespaceFlattener(prefixes=namespace_prefixes))

    def __call__(self, stream):
        boolean_attrs = self._BOOLEAN_ATTRS
        empty_elems = self._EMPTY_ELEMS
        have_doctype = False
        in_cdata = False

        stream = chain(self.preamble, stream)
        for filter_ in self.filters:
            stream = filter_(stream)
        for kind, data, pos in stream:

            if kind is START or kind is EMPTY:
                tag, attrib = data
                buf = ['<', tag]
                for attr, value in attrib:
                    if attr in boolean_attrs:
                        value = attr
                    buf += [' ', attr, '="', escape(value), '"']
                if kind is EMPTY:
                    if tag in empty_elems:
                        buf.append(' />')
                    else:
                        buf.append('></%s>' % tag)
                else:
                    buf.append('>')
                yield Markup(u''.join(buf))

            elif kind is END:
                yield Markup('</%s>' % data)

            elif kind is TEXT:
                if in_cdata:
                    yield data
                else:
                    yield escape(data, quotes=False)

            elif kind is COMMENT:
                yield Markup('<!--%s-->' % data)

            elif kind is DOCTYPE and not have_doctype:
                name, pubid, sysid = data
                buf = ['<!DOCTYPE %s']
                if pubid:
                    buf.append(' PUBLIC "%s"')
                elif sysid:
                    buf.append(' SYSTEM')
                if sysid:
                    buf.append(' "%s"')
                buf.append('>\n')
                yield Markup(u''.join(buf), *filter(None, data))
                have_doctype = True

            elif kind is START_CDATA:
                yield Markup('<![CDATA[')
                in_cdata = True

            elif kind is END_CDATA:
                yield Markup(']]>')
                in_cdata = False

            elif kind is PI:
                yield Markup('<?%s %s?>' % data)
Exemple #11
0
 def _handle_end(self, tag):
     self._enqueue(END, QName(tag))
Exemple #12
0
 def _handle_start(self, tag, attrib):
     attrs = Attrs([(QName(name), value)
                    for name, value in zip(*[iter(attrib)] * 2)])
     self._enqueue(START, (QName(tag), attrs))