def test_map_element(self): self.assertEqual( self._map('foo'), [(QName('foo'), Attrs([(QName('name'), u'foo'), (QName('size'), u'100')])), u'FOO', QName('foo')] )
def test_pickle(self): attrs = Attrs([("attr1", "foo"), ("attr2", "bar")]) buf = BytesIO() pickle.dump(attrs, buf, 2) buf.seek(0) unpickled = pickle.load(buf) self.assertEquals("Attrs([('attr1', 'foo'), ('attr2', 'bar')])", repr(unpickled))
def test_attr_selection_with_namespace(self): xml = XML('<root xmlns:ns1="http://example.com">' '<foo ns1:bar="abc"></foo>' '</root>') path = Path('foo/@ns1:bar') result = path.select(xml, namespaces={'ns1': 'http://example.com'}) self.assertEqual(list(result), [Attrs([(QName('http://example.com}bar'), u'abc')])])
def mark_text(self, pos, text, tag): ws, text = self.cut_leading_space(text) tag = QName(tag) if ws: self.append(TEXT, ws, pos) self.append(START, (tag, Attrs()), pos) self.append(TEXT, text, pos) self.append(END, tag, pos)
def test_duplicate_attributes(self): link = tag.a(href='#1', href_='#2')('Bar') events = list(link.generate()) self.assertEqual( (Stream.START, ('a', Attrs([('href', "#1")])), (None, -1, -1)), events[0]) self.assertEqual((Stream.TEXT, 'Bar', (None, -1, -1)), events[1]) self.assertEqual((Stream.END, 'a', (None, -1, -1)), events[2])
def __call__(self, *args, **kwargs): """Append any positional arguments as child nodes, and keyword arguments as attributes. :see: `Fragment.append` """ self.attrib |= Attrs(_kwargs_to_attrs(kwargs)) Fragment.__call__(self, *args) return self
def test_link(self): link = tag.a(href='#', title='Foo', accesskey=None)('Bar') events = list(link.generate()) self.assertEqual( (Stream.START, ('a', Attrs([('href', "#"), ('title', "Foo")])), (None, -1, -1)), events[0]) self.assertEqual((Stream.TEXT, 'Bar', (None, -1, -1)), events[1]) self.assertEqual((Stream.END, 'a', (None, -1, -1)), events[2])
def _generate(): for c, text in self._chunk(tokens): if c: attrs = Attrs([(class_, c)]) yield START, (span, attrs), pos yield TEXT, text, pos yield END, span, pos else: yield TEXT, text, pos
def _kwargs_to_attrs(kwargs): attrs = [] names = set() for name, value in kwargs.items(): name = name.rstrip('_').replace('_', '-') if value is not None and name not in names: attrs.append((QName(name), unicode(value))) names.add(name) return Attrs(attrs)
def test_link(self): link = tag.a(href='#', title='Foo', accesskey=None)('Bar') bits = iter(link.generate()) self.assertEqual( (Stream.START, ('a', Attrs([('href', "#"), ('title', "Foo")])), (None, -1, -1)), bits.next()) self.assertEqual((Stream.TEXT, u'Bar', (None, -1, -1)), bits.next()) self.assertEqual((Stream.END, 'a', (None, -1, -1)), bits.next())
def test_nonstring_attributes(self): """ Verify that if an attribute value is given as an int (or some other non-string type), it is coverted to a string when the stream is generated. """ events = list(tag.foo(id=3)) self.assertEqual( (Stream.START, ('foo', Attrs([('id', '3')])), (None, -1, -1)), events[0])
def to_genshi(walker): """Convert a tree to a genshi tree :arg walker: the treewalker to use to walk the tree to convert it :returns: generator of genshi nodes """ text = [] for token in walker: type = token["type"] if type in ("Characters", "SpaceCharacters"): text.append(token["data"]) elif text: yield TEXT, "".join(text), (None, -1, -1) text = [] if type in ("StartTag", "EmptyTag"): if token["namespace"]: name = "{%s}%s" % (token["namespace"], token["name"]) else: name = token["name"] attrs = Attrs( [ (QName("{%s}%s" % attr if attr[0] is not None else attr[1]), value) for attr, value in token["data"].items() ] ) yield (START, (QName(name), attrs), (None, -1, -1)) if type == "EmptyTag": type = "EndTag" if type == "EndTag": if token["namespace"]: name = "{%s}%s" % (token["namespace"], token["name"]) else: name = token["name"] yield END, QName(name), (None, -1, -1) elif type == "Comment": yield COMMENT, token["data"], (None, -1, -1) elif type == "Doctype": yield DOCTYPE, (token["name"], token["publicId"], token["systemId"]), ( None, -1, -1, ) else: pass # FIXME: What to do? if text: yield TEXT, "".join(text), (None, -1, -1)
def test_empty_text_in_span(self): """ http://trac.edgewall.org/ticket/4336 """ ns = Namespace('http://www.w3.org/1999/xhtml') input = [(START, (ns.span, Attrs([])), (None, -1, -1)), (TEXT, "", (None, -1, -1)), (END, ns.span, (None, -1, -1)), ] lines = list(_group_lines(input)) self.assertEqual(len(lines), 0)
def test_out_of_order_tags2(self): text = '<span class="baz"><b><i>Foobar</span></b></i>' events = list(HTMLParser(StringIO(text))) self.assertEqual(7, len(events)) self.assertEqual((Stream.START, ('span', Attrs([('class', 'baz')]))), events[0][:2]) self.assertEqual((Stream.START, ('b', ())), events[1][:2]) self.assertEqual((Stream.START, ('i', ())), events[2][:2]) self.assertEqual((Stream.TEXT, 'Foobar'), events[3][:2]) self.assertEqual((Stream.END, 'i'), events[4][:2]) self.assertEqual((Stream.END, 'b'), events[5][:2]) self.assertEqual((Stream.END, 'span'), events[6][:2])
def test_multiple_bound_namespaces(self): stream = Stream([ (Stream.START, (QName('div'), Attrs()), (None, -1, -1)), (Stream.TEXT, '\n ', (None, -1, -1)), (Stream.START_NS, ('x', 'http://example.org/'), (None, -1, -1)), (Stream.START, (QName('http://example.org/}p'), Attrs()), (None, -1, -1)), (Stream.END, QName('http://example.org/}p'), (None, -1, -1)), (Stream.END_NS, 'x', (None, -1, -1)), (Stream.TEXT, '\n ', (None, -1, -1)), (Stream.START_NS, ('x', 'http://example.org/'), (None, -1, -1)), (Stream.START, (QName('http://example.org/}p'), Attrs()), (None, -1, -1)), (Stream.END, QName('http://example.org/}p'), (None, -1, -1)), (Stream.END_NS, 'x', (None, -1, -1)), (Stream.TEXT, '\n ', (None, -1, -1)), (Stream.END, QName('div'), (None, -1, -1)), ]) output = stream.render(XMLSerializer, encoding=None) self.assertEqual("""<div> <x:p xmlns:x="http://example.org/"/> <x:p xmlns:x="http://example.org/"/> </div>""", output)
def handle_starttag(self, tag, attrib): fixed_attrib = [] for name, value in attrib: # Fixup minimized attributes if value is None: value = name fixed_attrib.append((QName(name), stripentities(value))) self._enqueue(START, (QName(tag), Attrs(fixed_attrib))) if tag in self._EMPTY_ELEMS: self._enqueue(END, QName(tag)) else: self._open_tags.append(tag)
def _eval(self, stream, ctxt, **vars): """Internal stream filter that evaluates any expressions in `START` and `TEXT` events. """ filters = (self._flatten, self._eval) number_conv = self._number_conv for kind, data, pos in stream: if kind is START and data[1]: # Attributes may still contain expressions in start tags at # this point, so do some evaluation tag, attrs = data new_attrs = [] for name, substream in attrs: if isinstance(substream, basestring): value = substream else: values = [] for subkind, subdata, subpos in self._eval(substream, ctxt, **vars): if subkind is TEXT: values.append(subdata) value = [x for x in values if x is not None] if not value: continue new_attrs.append((name, u''.join(value))) yield kind, (tag, Attrs(new_attrs)), pos elif kind is EXPR: result = _eval_expr(data, ctxt, **vars) if result is not None: # First check for a string, otherwise the iterable test # below succeeds, and the string will be chopped up into # individual characters if isinstance(result, basestring): yield TEXT, result, pos elif isinstance(result, (int, float, long)): yield TEXT, number_conv(result), pos elif hasattr(result, '__iter__'): substream = _ensure(result) for filter_ in filters: substream = filter_(substream, ctxt, **vars) for event in substream: yield event else: yield TEXT, unicode(result), pos else: yield kind, data, pos
def handle_starttag(self, tag, attrib): fixed_attrib = [] for name, value in attrib: # Fixup minimized attributes if value is None: value = str(name) elif not isinstance(value, str): value = value.decode(self.encoding, 'replace') fixed_attrib.append((QName(name), stripentities(value))) self._enqueue(START, (QName(tag), Attrs(fixed_attrib))) if tag in self._EMPTY_ELEMS: self._enqueue(END, QName(tag)) else: self._open_tags.append(tag)
def test_nested_default_namespaces(self): stream = Stream([ (Stream.START_NS, ('', 'http://example.org/'), (None, -1, -1)), (Stream.START, (QName('http://example.org/}div'), Attrs()), (None, -1, -1)), (Stream.TEXT, '\n ', (None, -1, -1)), (Stream.START_NS, ('', 'http://example.org/'), (None, -1, -1)), (Stream.START, (QName('http://example.org/}p'), Attrs()), (None, -1, -1)), (Stream.END, QName('http://example.org/}p'), (None, -1, -1)), (Stream.END_NS, '', (None, -1, -1)), (Stream.TEXT, '\n ', (None, -1, -1)), (Stream.START_NS, ('', 'http://example.org/'), (None, -1, -1)), (Stream.START, (QName('http://example.org/}p'), Attrs()), (None, -1, -1)), (Stream.END, QName('http://example.org/}p'), (None, -1, -1)), (Stream.END_NS, '', (None, -1, -1)), (Stream.TEXT, '\n ', (None, -1, -1)), (Stream.END, QName('http://example.org/}div'), (None, -1, -1)), (Stream.END_NS, '', (None, -1, -1)) ]) output = stream.render(XMLSerializer, encoding=None) self.assertEqual("""<div xmlns="http://example.org/"> <p/> <p/> </div>""", output)
def test_nested_bound_namespaces(self): stream = Stream([ (Stream.START_NS, ('x', 'http://example.org/'), (None, -1, -1)), (Stream.START, (QName('div'), Attrs()), (None, -1, -1)), (Stream.TEXT, '\n ', (None, -1, -1)), (Stream.START_NS, ('x', 'http://example.org/'), (None, -1, -1)), (Stream.START, (QName('p'), Attrs()), (None, -1, -1)), (Stream.END, QName('p'), (None, -1, -1)), (Stream.END_NS, 'x', (None, -1, -1)), (Stream.TEXT, '\n ', (None, -1, -1)), (Stream.START_NS, ('x', 'http://example.org/'), (None, -1, -1)), (Stream.START, (QName('p'), Attrs()), (None, -1, -1)), (Stream.END, QName('p'), (None, -1, -1)), (Stream.END_NS, 'x', (None, -1, -1)), (Stream.TEXT, '\n ', (None, -1, -1)), (Stream.END, QName('div'), (None, -1, -1)), (Stream.END_NS, 'x', (None, -1, -1)) ]) output = stream.render(XHTMLSerializer) self.assertEqual( """<div xmlns:x="http://example.org/"> <p></p> <p></p> </div>""", output)
def GenshiAdapter(tree): text = None for token in treewalkers.getTreeWalker("dom")(tree): type = token["type"] if type in ("Characters", "SpaceCharacters"): if text is None: text = token["data"] else: text += token["data"] elif text is not None: yield TEXT, text, (None, -1, -1) text = None if type in ("StartTag", "EmptyTag"): if token["namespace"]: name = "{%s}%s" % (token["namespace"], token["name"]) else: name = token["name"] attrs = Attrs([ (QName("{%s}%s" % attr if attr[0] is not None else attr[1]), value) for attr, value in token["data"].items() ]) yield (START, (QName(name), attrs), (None, -1, -1)) if type == "EmptyTag": type = "EndTag" if type == "EndTag": if token["namespace"]: name = "{%s}%s" % (token["namespace"], token["name"]) else: name = token["name"] yield END, QName(name), (None, -1, -1) elif type == "Comment": yield COMMENT, token["data"], (None, -1, -1) elif type == "Doctype": yield DOCTYPE, (token["name"], token["publicId"], token["systemId"]), (None, -1, -1) else: pass # FIXME: What to do? if text is not None: yield TEXT, text, (None, -1, -1)
def GenshiAdapter(tree): text = None for token in treewalkers.getTreeWalker('dom')(tree): type = token['type'] if type in ('Characters', 'SpaceCharacters'): if text is None: text = token['data'] else: text += token['data'] elif text is not None: yield TEXT, text, (None, -1, -1) text = None if type in ('StartTag', 'EmptyTag'): if token['namespace']: name = '{%s}%s' % (token['namespace'], token['name']) else: name = token['name'] attrs = Attrs([ (QName('{%s}%s' % attr if attr[0] is not None else attr[1]), value) for attr, value in token['data'].items() ]) yield (START, (QName(name), attrs), (None, -1, -1)) if type == 'EmptyTag': type = 'EndTag' if type == 'EndTag': if token['namespace']: name = '{%s}%s' % (token['namespace'], token['name']) else: name = token['name'] yield END, QName(name), (None, -1, -1) elif type == 'Comment': yield COMMENT, token['data'], (None, -1, -1) elif type == 'Doctype': yield DOCTYPE, (token['name'], token['publicId'], token['systemId']), (None, -1, -1) else: pass # FIXME: What to do? if text is not None: yield TEXT, text, (None, -1, -1)
def _interpolate_attrs(self, stream): for kind, data, pos in stream: if kind is START: # Record any directive attributes in start tags tag, attrs = data new_attrs = [] for name, value in attrs: if value: value = list(interpolate(value, self.filepath, pos[1], pos[2], lookup=self.lookup)) if len(value) == 1 and value[0][0] is TEXT: value = value[0][1] new_attrs.append((name, value)) data = tag, Attrs(new_attrs) yield kind, data, pos
def __call__(self, stream): """Apply the filter to the given stream. :param stream: the markup event stream to filter """ waiting_for = None for kind, data, pos in stream: if kind is START: if waiting_for: continue tag, attrs = data if not self.is_safe_elem(tag, attrs): waiting_for = tag continue new_attrs = [] for attr, value in attrs: value = stripentities(value) if attr not in self.safe_attrs: continue elif attr in self.uri_attrs: # Don't allow URI schemes such as "javascript:" if not self.is_safe_uri(value): continue elif attr == 'style': # Remove dangerous CSS declarations from inline styles decls = self.sanitize_css(value) if not decls: continue value = '; '.join(decls) new_attrs.append((attr, value)) yield kind, (tag, Attrs(new_attrs)), pos elif kind is END: tag = data if waiting_for: if waiting_for == tag: waiting_for = None else: yield kind, data, pos elif kind is not COMMENT: if not waiting_for: yield kind, data, pos
def block_process(self, events): for event in events: type, data, pos = event if type == START: self.enter(pos, *data) elif type == END: self.leave(pos, data) elif type == TEXT: if self._context is not None and data.strip(): tag = QName(self._context) self.append(START, (QName(tag), Attrs()), pos) self.append(type, data, pos) self.append(END, tag, pos) else: self.append(type, data, pos) else: self.append(type, data, pos)
def ET(element): """Convert a given ElementTree element to a markup stream. :param element: an ElementTree element :return: a markup stream """ tag_name = QName(element.tag.lstrip('{')) attrs = Attrs([(QName(attr.lstrip('{')), value) for attr, value in element.items()]) yield START, (tag_name, attrs), (None, -1, -1) if element.text: yield TEXT, element.text, (None, -1, -1) for child in element.getchildren(): for item in ET(child): yield item yield END, tag_name, (None, -1, -1) if element.tail: yield TEXT, element.tail, (None, -1, -1)
def filter_(stream): for kind, data, pos in stream: if kind is START: tag, attrs = data if tag == 'a': href = attrs.get('href') attrs -= 'href' href = short(href, str(message)) attrs |= [(QName('href'), href)] data = tag, attrs elif kind is END and data == 'body' and spy_pixel: yield START, (QName('img'), Attrs([ (QName('src'), short(URL_OPEN, str(message))), (QName('height'), '1'), (QName('width'), '1'), ])), pos yield END, QName('img'), pos yield kind, data, pos
def GenshiAdapter(tree): text = None for token in treewalkers.getTreeWalker(u"simpletree")(tree): type = token[u"type"] if type in (u"Characters", u"SpaceCharacters"): if text is None: text = token[u"data"] else: text += token[u"data"] elif text is not None: yield TEXT, text, (None, -1, -1) text = None if type in (u"StartTag", u"EmptyTag"): if token[u"namespace"]: name = u"{%s}%s" % (token[u"namespace"], token[u"name"]) else: name = token[u"name"] yield (START, (QName(name), Attrs([(QName(attr),value) for attr,value in token[u"data"]])), (None, -1, -1)) if type == u"EmptyTag": type = u"EndTag" if type == u"EndTag": yield END, QName(token[u"name"]), (None, -1, -1) elif type == u"Comment": yield COMMENT, token[u"data"], (None, -1, -1) elif type == u"Doctype": yield DOCTYPE, (token[u"name"], token[u"publicId"], token[u"systemId"]), (None, -1, -1) else: pass # FIXME: What to do? if text is not None: yield TEXT, text, (None, -1, -1)
def GenshiAdapter(treewalker, tree): """Generator to convert html5lib treewalker tokens into Genshi stream tokens""" text = None for token in treewalker(tree): token_type = token["type"] if token_type in ("Characters", "SpaceCharacters"): if text is None: text = token["data"] else: text += token["data"] elif text is not None: assert type(text) in (unicode, None) yield TEXT, text, (None, -1, -1) text = None if token_type in ("StartTag", "EmptyTag"): yield (START, (QName(token["name"]), Attrs([(QName(attr), value) for attr, value in token["data"]])), (None, -1, -1)) if token_type == "EmptyTag": token_type = "EndTag" if token_type == "EndTag": yield END, QName(token["name"]), (None, -1, -1) elif token_type == "Comment": yield COMMENT, token["data"], (None, -1, -1) elif token_type == "Doctype": yield DOCTYPE, (token["name"], None, None), (None, -1, -1) else: pass # FIXME: What to do? if text is not None: yield TEXT, text, (None, -1, -1)
def close_diff(self): """ Close diff marked piece of HTML. Detect if all operation events was the same, if so it means that only one kind of operation happen, so regular diff tag should be rendered. If operations are not the same this means that inside diffed element there is also original content, so in fact this was a change in formatting. Diffed sections are rendered lazily. Here, opening diff tag is put into result stream, then collected buffer events and closing tag. """ # child is an element which should be wrapped by diff, this could be a text node # or tag child = self._buffer[0] if self._all_same: # all events was the same operation node = getattr(DefaultDiffProducer, 'render_%s' % self.operation)( self.get_current_element()) else: # here text node is not valid, diff in text is by word, its not possible that # text nodes are inserted and not all in this context was inserted (if so, diff iterator # should return equal action and break insert processor) assert child[0] == START formatting_node = DOMNode(name=child[1][0], attrs=child[1][1]) node = getattr(DefaultDiffProducer, 'render_formatting_%s' % self.operation)(self.get_current_element(), formatting_node) logger.debug("Lazy diff '%s' of %d nodes marked using %r" % (self.operation, len(self._buffer), node)) self._result.append( (START, (QName(node.name), Attrs(node.attrs)), None)) self._result.extend(self._buffer) self._result.append((END, QName(node.name), None)) self._rendered = False self._buffer = []