Example #1
0
 def test_map_element(self):
     self.assertEqual(
         self._map('foo'),
         [(QName('foo'), Attrs([(QName('name'), u'foo'),
                                (QName('size'), u'100')])),
          u'FOO',
          QName('foo')]
     )
Example #2
0
 def test_pickle(self):
     attrs = Attrs([("attr1", "foo"), ("attr2", "bar")])
     buf = BytesIO()
     pickle.dump(attrs, buf, 2)
     buf.seek(0)
     unpickled = pickle.load(buf)
     self.assertEquals("Attrs([('attr1', 'foo'), ('attr2', 'bar')])",
                       repr(unpickled))
Example #3
0
 def test_attr_selection_with_namespace(self):
     xml = XML('<root xmlns:ns1="http://example.com">'
               '<foo ns1:bar="abc"></foo>'
               '</root>')
     path = Path('foo/@ns1:bar')
     result = path.select(xml, namespaces={'ns1': 'http://example.com'})
     self.assertEqual(list(result),
                      [Attrs([(QName('http://example.com}bar'), u'abc')])])
Example #4
0
 def mark_text(self, pos, text, tag):
     ws, text = self.cut_leading_space(text)
     tag = QName(tag)
     if ws:
         self.append(TEXT, ws, pos)
     self.append(START, (tag, Attrs()), pos)
     self.append(TEXT, text, pos)
     self.append(END, tag, pos)
Example #5
0
 def test_duplicate_attributes(self):
     link = tag.a(href='#1', href_='#2')('Bar')
     events = list(link.generate())
     self.assertEqual(
         (Stream.START, ('a', Attrs([('href', "#1")])), (None, -1, -1)),
         events[0])
     self.assertEqual((Stream.TEXT, 'Bar', (None, -1, -1)), events[1])
     self.assertEqual((Stream.END, 'a', (None, -1, -1)), events[2])
Example #6
0
 def __call__(self, *args, **kwargs):
     """Append any positional arguments as child nodes, and keyword arguments
     as attributes.
     
     :see: `Fragment.append`
     """
     self.attrib |= Attrs(_kwargs_to_attrs(kwargs))
     Fragment.__call__(self, *args)
     return self
Example #7
0
 def test_link(self):
     link = tag.a(href='#', title='Foo', accesskey=None)('Bar')
     events = list(link.generate())
     self.assertEqual(
         (Stream.START, ('a', Attrs([('href', "#"),
                                     ('title', "Foo")])), (None, -1, -1)),
         events[0])
     self.assertEqual((Stream.TEXT, 'Bar', (None, -1, -1)), events[1])
     self.assertEqual((Stream.END, 'a', (None, -1, -1)), events[2])
Example #8
0
 def _generate():
     for c, text in self._chunk(tokens):
         if c:
             attrs = Attrs([(class_, c)])
             yield START, (span, attrs), pos
             yield TEXT, text, pos
             yield END, span, pos
         else:
             yield TEXT, text, pos
Example #9
0
def _kwargs_to_attrs(kwargs):
    attrs = []
    names = set()
    for name, value in kwargs.items():
        name = name.rstrip('_').replace('_', '-')
        if value is not None and name not in names:
            attrs.append((QName(name), unicode(value)))
            names.add(name)
    return Attrs(attrs)
Example #10
0
 def test_link(self):
     link = tag.a(href='#', title='Foo', accesskey=None)('Bar')
     bits = iter(link.generate())
     self.assertEqual(
         (Stream.START, ('a', Attrs([('href', "#"),
                                     ('title', "Foo")])), (None, -1, -1)),
         bits.next())
     self.assertEqual((Stream.TEXT, u'Bar', (None, -1, -1)), bits.next())
     self.assertEqual((Stream.END, 'a', (None, -1, -1)), bits.next())
 def test_nonstring_attributes(self):
     """
     Verify that if an attribute value is given as an int (or some other
     non-string type), it is coverted to a string when the stream is
     generated.
     """
     events = list(tag.foo(id=3))
     self.assertEqual(
         (Stream.START, ('foo', Attrs([('id', '3')])), (None, -1, -1)),
         events[0])
Example #12
0
def to_genshi(walker):
    """Convert a tree to a genshi tree

    :arg walker: the treewalker to use to walk the tree to convert it

    :returns: generator of genshi nodes

    """
    text = []
    for token in walker:
        type = token["type"]
        if type in ("Characters", "SpaceCharacters"):
            text.append(token["data"])
        elif text:
            yield TEXT, "".join(text), (None, -1, -1)
            text = []

        if type in ("StartTag", "EmptyTag"):
            if token["namespace"]:
                name = "{%s}%s" % (token["namespace"], token["name"])
            else:
                name = token["name"]
            attrs = Attrs(
                [
                    (QName("{%s}%s" % attr if attr[0] is not None else attr[1]), value)
                    for attr, value in token["data"].items()
                ]
            )
            yield (START, (QName(name), attrs), (None, -1, -1))
            if type == "EmptyTag":
                type = "EndTag"

        if type == "EndTag":
            if token["namespace"]:
                name = "{%s}%s" % (token["namespace"], token["name"])
            else:
                name = token["name"]

            yield END, QName(name), (None, -1, -1)

        elif type == "Comment":
            yield COMMENT, token["data"], (None, -1, -1)

        elif type == "Doctype":
            yield DOCTYPE, (token["name"], token["publicId"], token["systemId"]), (
                None,
                -1,
                -1,
            )

        else:
            pass  # FIXME: What to do?

    if text:
        yield TEXT, "".join(text), (None, -1, -1)
Example #13
0
 def test_empty_text_in_span(self):
     """
     http://trac.edgewall.org/ticket/4336
     """
     ns = Namespace('http://www.w3.org/1999/xhtml')
     input = [(START, (ns.span, Attrs([])), (None, -1, -1)),
              (TEXT, "", (None, -1, -1)),
              (END, ns.span, (None, -1, -1)),
             ]
     lines = list(_group_lines(input))
     self.assertEqual(len(lines), 0)
Example #14
0
 def test_out_of_order_tags2(self):
     text = '<span class="baz"><b><i>Foobar</span></b></i>'
     events = list(HTMLParser(StringIO(text)))
     self.assertEqual(7, len(events))
     self.assertEqual((Stream.START, ('span', Attrs([('class', 'baz')]))),
                      events[0][:2])
     self.assertEqual((Stream.START, ('b', ())), events[1][:2])
     self.assertEqual((Stream.START, ('i', ())), events[2][:2])
     self.assertEqual((Stream.TEXT, 'Foobar'), events[3][:2])
     self.assertEqual((Stream.END, 'i'), events[4][:2])
     self.assertEqual((Stream.END, 'b'), events[5][:2])
     self.assertEqual((Stream.END, 'span'), events[6][:2])
Example #15
0
 def test_multiple_bound_namespaces(self):
     stream = Stream([
         (Stream.START, (QName('div'), Attrs()), (None, -1, -1)),
         (Stream.TEXT, '\n          ', (None, -1, -1)),
         (Stream.START_NS, ('x', 'http://example.org/'), (None, -1, -1)),
         (Stream.START, (QName('http://example.org/}p'), Attrs()), (None, -1, -1)),
         (Stream.END, QName('http://example.org/}p'), (None, -1, -1)),
         (Stream.END_NS, 'x', (None, -1, -1)),
         (Stream.TEXT, '\n          ', (None, -1, -1)),
         (Stream.START_NS, ('x', 'http://example.org/'), (None, -1, -1)),
         (Stream.START, (QName('http://example.org/}p'), Attrs()), (None, -1, -1)),
         (Stream.END, QName('http://example.org/}p'), (None, -1, -1)),
         (Stream.END_NS, 'x', (None, -1, -1)),
         (Stream.TEXT, '\n        ', (None, -1, -1)),
         (Stream.END, QName('div'), (None, -1, -1)),
     ])
     output = stream.render(XMLSerializer, encoding=None)
     self.assertEqual("""<div>
       <x:p xmlns:x="http://example.org/"/>
       <x:p xmlns:x="http://example.org/"/>
     </div>""", output)
    def handle_starttag(self, tag, attrib):
        fixed_attrib = []
        for name, value in attrib:  # Fixup minimized attributes
            if value is None:
                value = name
            fixed_attrib.append((QName(name), stripentities(value)))

        self._enqueue(START, (QName(tag), Attrs(fixed_attrib)))
        if tag in self._EMPTY_ELEMS:
            self._enqueue(END, QName(tag))
        else:
            self._open_tags.append(tag)
Example #17
0
    def _eval(self, stream, ctxt, **vars):
        """Internal stream filter that evaluates any expressions in `START` and
        `TEXT` events.
        """
        filters = (self._flatten, self._eval)
        number_conv = self._number_conv

        for kind, data, pos in stream:

            if kind is START and data[1]:
                # Attributes may still contain expressions in start tags at
                # this point, so do some evaluation
                tag, attrs = data
                new_attrs = []
                for name, substream in attrs:
                    if isinstance(substream, basestring):
                        value = substream
                    else:
                        values = []
                        for subkind, subdata, subpos in self._eval(substream,
                                                                   ctxt,
                                                                   **vars):
                            if subkind is TEXT:
                                values.append(subdata)
                        value = [x for x in values if x is not None]
                        if not value:
                            continue
                    new_attrs.append((name, u''.join(value)))
                yield kind, (tag, Attrs(new_attrs)), pos

            elif kind is EXPR:
                result = _eval_expr(data, ctxt, **vars)
                if result is not None:
                    # First check for a string, otherwise the iterable test
                    # below succeeds, and the string will be chopped up into
                    # individual characters
                    if isinstance(result, basestring):
                        yield TEXT, result, pos
                    elif isinstance(result, (int, float, long)):
                        yield TEXT, number_conv(result), pos
                    elif hasattr(result, '__iter__'):
                        substream = _ensure(result)
                        for filter_ in filters:
                            substream = filter_(substream, ctxt, **vars)
                        for event in substream:
                            yield event
                    else:
                        yield TEXT, unicode(result), pos

            else:
                yield kind, data, pos
Example #18
0
    def handle_starttag(self, tag, attrib):
        fixed_attrib = []
        for name, value in attrib: # Fixup minimized attributes
            if value is None:
                value = str(name)
            elif not isinstance(value, str):
                value = value.decode(self.encoding, 'replace')
            fixed_attrib.append((QName(name), stripentities(value)))

        self._enqueue(START, (QName(tag), Attrs(fixed_attrib)))
        if tag in self._EMPTY_ELEMS:
            self._enqueue(END, QName(tag))
        else:
            self._open_tags.append(tag)
Example #19
0
 def test_nested_default_namespaces(self):
     stream = Stream([
         (Stream.START_NS, ('', 'http://example.org/'), (None, -1, -1)),
         (Stream.START, (QName('http://example.org/}div'), Attrs()), (None, -1, -1)),
         (Stream.TEXT, '\n          ', (None, -1, -1)),
         (Stream.START_NS, ('', 'http://example.org/'), (None, -1, -1)),
         (Stream.START, (QName('http://example.org/}p'), Attrs()), (None, -1, -1)),
         (Stream.END, QName('http://example.org/}p'), (None, -1, -1)),
         (Stream.END_NS, '', (None, -1, -1)),
         (Stream.TEXT, '\n          ', (None, -1, -1)),
         (Stream.START_NS, ('', 'http://example.org/'), (None, -1, -1)),
         (Stream.START, (QName('http://example.org/}p'), Attrs()), (None, -1, -1)),
         (Stream.END, QName('http://example.org/}p'), (None, -1, -1)),
         (Stream.END_NS, '', (None, -1, -1)),
         (Stream.TEXT, '\n        ', (None, -1, -1)),
         (Stream.END, QName('http://example.org/}div'), (None, -1, -1)),
         (Stream.END_NS, '', (None, -1, -1))
     ])
     output = stream.render(XMLSerializer, encoding=None)
     self.assertEqual("""<div xmlns="http://example.org/">
       <p/>
       <p/>
     </div>""", output)
Example #20
0
 def test_nested_bound_namespaces(self):
     stream = Stream([
         (Stream.START_NS, ('x', 'http://example.org/'), (None, -1, -1)),
         (Stream.START, (QName('div'), Attrs()), (None, -1, -1)),
         (Stream.TEXT, '\n          ', (None, -1, -1)),
         (Stream.START_NS, ('x', 'http://example.org/'), (None, -1, -1)),
         (Stream.START, (QName('p'), Attrs()), (None, -1, -1)),
         (Stream.END, QName('p'), (None, -1, -1)),
         (Stream.END_NS, 'x', (None, -1, -1)),
         (Stream.TEXT, '\n          ', (None, -1, -1)),
         (Stream.START_NS, ('x', 'http://example.org/'), (None, -1, -1)),
         (Stream.START, (QName('p'), Attrs()), (None, -1, -1)),
         (Stream.END, QName('p'), (None, -1, -1)),
         (Stream.END_NS, 'x', (None, -1, -1)),
         (Stream.TEXT, '\n        ', (None, -1, -1)),
         (Stream.END, QName('div'), (None, -1, -1)),
         (Stream.END_NS, 'x', (None, -1, -1))
     ])
     output = stream.render(XHTMLSerializer)
     self.assertEqual(
         """<div xmlns:x="http://example.org/">
       <p></p>
       <p></p>
     </div>""", output)
    def GenshiAdapter(tree):
        text = None
        for token in treewalkers.getTreeWalker("dom")(tree):
            type = token["type"]
            if type in ("Characters", "SpaceCharacters"):
                if text is None:
                    text = token["data"]
                else:
                    text += token["data"]
            elif text is not None:
                yield TEXT, text, (None, -1, -1)
                text = None

            if type in ("StartTag", "EmptyTag"):
                if token["namespace"]:
                    name = "{%s}%s" % (token["namespace"], token["name"])
                else:
                    name = token["name"]
                attrs = Attrs([
                    (QName("{%s}%s" %
                           attr if attr[0] is not None else attr[1]), value)
                    for attr, value in token["data"].items()
                ])
                yield (START, (QName(name), attrs), (None, -1, -1))
                if type == "EmptyTag":
                    type = "EndTag"

            if type == "EndTag":
                if token["namespace"]:
                    name = "{%s}%s" % (token["namespace"], token["name"])
                else:
                    name = token["name"]

                yield END, QName(name), (None, -1, -1)

            elif type == "Comment":
                yield COMMENT, token["data"], (None, -1, -1)

            elif type == "Doctype":
                yield DOCTYPE, (token["name"], token["publicId"],
                                token["systemId"]), (None, -1, -1)

            else:
                pass  # FIXME: What to do?

        if text is not None:
            yield TEXT, text, (None, -1, -1)
Example #22
0
    def GenshiAdapter(tree):
        text = None
        for token in treewalkers.getTreeWalker('dom')(tree):
            type = token['type']
            if type in ('Characters', 'SpaceCharacters'):
                if text is None:
                    text = token['data']
                else:
                    text += token['data']
            elif text is not None:
                yield TEXT, text, (None, -1, -1)
                text = None

            if type in ('StartTag', 'EmptyTag'):
                if token['namespace']:
                    name = '{%s}%s' % (token['namespace'], token['name'])
                else:
                    name = token['name']
                attrs = Attrs([
                    (QName('{%s}%s' %
                           attr if attr[0] is not None else attr[1]), value)
                    for attr, value in token['data'].items()
                ])
                yield (START, (QName(name), attrs), (None, -1, -1))
                if type == 'EmptyTag':
                    type = 'EndTag'

            if type == 'EndTag':
                if token['namespace']:
                    name = '{%s}%s' % (token['namespace'], token['name'])
                else:
                    name = token['name']

                yield END, QName(name), (None, -1, -1)

            elif type == 'Comment':
                yield COMMENT, token['data'], (None, -1, -1)

            elif type == 'Doctype':
                yield DOCTYPE, (token['name'], token['publicId'],
                                token['systemId']), (None, -1, -1)

            else:
                pass  # FIXME: What to do?

        if text is not None:
            yield TEXT, text, (None, -1, -1)
    def _interpolate_attrs(self, stream):
        for kind, data, pos in stream:

            if kind is START:
                # Record any directive attributes in start tags
                tag, attrs = data
                new_attrs = []
                for name, value in attrs:
                    if value:
                        value = list(interpolate(value, self.filepath, pos[1],
                                                 pos[2], lookup=self.lookup))
                        if len(value) == 1 and value[0][0] is TEXT:
                            value = value[0][1]
                    new_attrs.append((name, value))
                data = tag, Attrs(new_attrs)

            yield kind, data, pos
Example #24
0
    def __call__(self, stream):
        """Apply the filter to the given stream.
        
        :param stream: the markup event stream to filter
        """
        waiting_for = None

        for kind, data, pos in stream:
            if kind is START:
                if waiting_for:
                    continue
                tag, attrs = data
                if not self.is_safe_elem(tag, attrs):
                    waiting_for = tag
                    continue

                new_attrs = []
                for attr, value in attrs:
                    value = stripentities(value)
                    if attr not in self.safe_attrs:
                        continue
                    elif attr in self.uri_attrs:
                        # Don't allow URI schemes such as "javascript:"
                        if not self.is_safe_uri(value):
                            continue
                    elif attr == 'style':
                        # Remove dangerous CSS declarations from inline styles
                        decls = self.sanitize_css(value)
                        if not decls:
                            continue
                        value = '; '.join(decls)
                    new_attrs.append((attr, value))

                yield kind, (tag, Attrs(new_attrs)), pos

            elif kind is END:
                tag = data
                if waiting_for:
                    if waiting_for == tag:
                        waiting_for = None
                else:
                    yield kind, data, pos

            elif kind is not COMMENT:
                if not waiting_for:
                    yield kind, data, pos
Example #25
0
 def block_process(self, events):
     for event in events:
         type, data, pos = event
         if type == START:
             self.enter(pos, *data)
         elif type == END:
             self.leave(pos, data)
         elif type == TEXT:
             if self._context is not None and data.strip():
                 tag = QName(self._context)
                 self.append(START, (QName(tag), Attrs()), pos)
                 self.append(type, data, pos)
                 self.append(END, tag, pos)
             else:
                 self.append(type, data, pos)
         else:
             self.append(type, data, pos)
Example #26
0
def ET(element):
    """Convert a given ElementTree element to a markup stream.
    
    :param element: an ElementTree element
    :return: a markup stream
    """
    tag_name = QName(element.tag.lstrip('{'))
    attrs = Attrs([(QName(attr.lstrip('{')), value)
                   for attr, value in element.items()])

    yield START, (tag_name, attrs), (None, -1, -1)
    if element.text:
        yield TEXT, element.text, (None, -1, -1)
    for child in element.getchildren():
        for item in ET(child):
            yield item
    yield END, tag_name, (None, -1, -1)
    if element.tail:
        yield TEXT, element.tail, (None, -1, -1)
Example #27
0
 def filter_(stream):
     for kind, data, pos in stream:
         if kind is START:
             tag, attrs = data
             if tag == 'a':
                 href = attrs.get('href')
                 attrs -= 'href'
                 href = short(href, str(message))
                 attrs |= [(QName('href'), href)]
                 data = tag, attrs
         elif kind is END and data == 'body' and spy_pixel:
             yield START, (QName('img'),
                           Attrs([
                               (QName('src'),
                                short(URL_OPEN, str(message))),
                               (QName('height'), '1'),
                               (QName('width'), '1'),
                           ])), pos
             yield END, QName('img'), pos
         yield kind, data, pos
Example #28
0
    def GenshiAdapter(tree):
        text = None
        for token in treewalkers.getTreeWalker(u"simpletree")(tree):
            type = token[u"type"]
            if type in (u"Characters", u"SpaceCharacters"):
                if text is None:
                    text = token[u"data"]
                else:
                    text += token[u"data"]
            elif text is not None:
                yield TEXT, text, (None, -1, -1)
                text = None

            if type in (u"StartTag", u"EmptyTag"):
                if token[u"namespace"]:
                    name = u"{%s}%s" % (token[u"namespace"], token[u"name"])
                else:
                    name = token[u"name"]
                yield (START,
                       (QName(name),
                        Attrs([(QName(attr),value) for attr,value in token[u"data"]])),
                       (None, -1, -1))
                if type == u"EmptyTag":
                    type = u"EndTag"

            if type == u"EndTag":
                yield END, QName(token[u"name"]), (None, -1, -1)

            elif type == u"Comment":
                yield COMMENT, token[u"data"], (None, -1, -1)

            elif type == u"Doctype":
                yield DOCTYPE, (token[u"name"], token[u"publicId"], 
                                token[u"systemId"]), (None, -1, -1)

            else:
                pass # FIXME: What to do?

        if text is not None:
            yield TEXT, text, (None, -1, -1)
Example #29
0
def GenshiAdapter(treewalker, tree):
    """Generator to convert html5lib treewalker tokens into Genshi
    stream tokens"""
    text = None
    for token in treewalker(tree):
        token_type = token["type"]
        if token_type in ("Characters", "SpaceCharacters"):
            if text is None:
                text = token["data"]
            else:
                text += token["data"]
        elif text is not None:
            assert type(text) in (unicode, None)
            yield TEXT, text, (None, -1, -1)
            text = None

        if token_type in ("StartTag", "EmptyTag"):
            yield (START, (QName(token["name"]),
                           Attrs([(QName(attr), value)
                                  for attr, value in token["data"]])),
                   (None, -1, -1))
            if token_type == "EmptyTag":
                token_type = "EndTag"

        if token_type == "EndTag":
            yield END, QName(token["name"]), (None, -1, -1)

        elif token_type == "Comment":
            yield COMMENT, token["data"], (None, -1, -1)

        elif token_type == "Doctype":
            yield DOCTYPE, (token["name"], None, None), (None, -1, -1)

        else:
            pass  # FIXME: What to do?

    if text is not None:
        yield TEXT, text, (None, -1, -1)
Example #30
0
    def close_diff(self):
        """
        Close diff marked piece of HTML. Detect if all operation events was the same, if so it means that only
        one kind of operation happen, so regular diff tag should be rendered. If operations are not the same
        this means that inside diffed element there is also original content, so in fact this was a change in
        formatting.

        Diffed sections are rendered lazily. Here, opening diff tag is put into result stream, then
        collected buffer events and closing tag.
        """
        # child is an element which should be wrapped by diff, this could be a text node
        # or tag
        child = self._buffer[0]
        if self._all_same:
            # all events was the same operation
            node = getattr(DefaultDiffProducer, 'render_%s' % self.operation)(
                self.get_current_element())
        else:
            # here text node is not valid, diff in text is by word, its not possible that
            # text nodes are inserted and not all in this context was inserted (if so, diff iterator
            # should return equal action and break insert processor)
            assert child[0] == START
            formatting_node = DOMNode(name=child[1][0], attrs=child[1][1])
            node = getattr(DefaultDiffProducer, 'render_formatting_%s' %
                           self.operation)(self.get_current_element(),
                                           formatting_node)

        logger.debug("Lazy diff '%s' of %d nodes marked using %r" %
                     (self.operation, len(self._buffer), node))

        self._result.append(
            (START, (QName(node.name), Attrs(node.attrs)), None))
        self._result.extend(self._buffer)
        self._result.append((END, QName(node.name), None))

        self._rendered = False
        self._buffer = []