def test_replace_node_with_text(self): # replace span HTML = """ <div><p><span>span</span><a href="#">link</a></p></div>""" tree = fromstring(HTML) replace_node_with_text(tree, './/span', 'FOO') self.assertEqual(render_html(tree, encoding='utf-8'), b'<div><p>FOO<a href="#">link</a></p></div>') # replace span and keep its tail HTML = """ <div><p><span>span</span>BAR<a href="#">link</a></p></div>""" tree = fromstring(HTML) replace_node_with_text(tree, './/span', 'FOO') self.assertEqual(render_html(tree, encoding='utf-8'), b'<div><p>FOOBAR<a href="#">link</a></p></div>') # replace p which is only child of parent div HTML = """ <div><p><span>span</span>BAR<a href="#">link</a></p></div>""" tree = fromstring(HTML) replace_node_with_text(tree, './/p', 'FOO') self.assertEqual(render_html(tree, encoding='utf-8'), b'<div>FOO</div>') # replace span and keep tai of its preceeding sibling element HTML = """ <div><p><strong>str</strong>!<span>span</span>BAR<a href="#">link</a></p></div>""" tree = fromstring(HTML) replace_node_with_text(tree, './/span', 'FOO') self.assertEqual( render_html(tree, encoding='utf-8'), b'<div><p><strong>str</strong>' b'!FOOBAR<a href="#">link</a></p></div>')
def test_replace_node_with_text(self): # replace span HTML = """ <div><p><span>span</span><a href="#">link</a></p></div>""" tree = fromstring(HTML) replace_node_with_text(tree, './/span', 'FOO') self.assertEqual(render_html(tree, encoding='utf-8'), b'<div><p>FOO<a href="#">link</a></p></div>') # replace span and keep its tail HTML = """ <div><p><span>span</span>BAR<a href="#">link</a></p></div>""" tree = fromstring(HTML) replace_node_with_text(tree, './/span', 'FOO') self.assertEqual(render_html(tree, encoding='utf-8'), b'<div><p>FOOBAR<a href="#">link</a></p></div>') # replace p which is only child of parent div HTML = """ <div><p><span>span</span>BAR<a href="#">link</a></p></div>""" tree = fromstring(HTML) replace_node_with_text(tree, './/p', 'FOO') self.assertEqual(render_html(tree, encoding='utf-8'), b'<div>FOO</div>') # replace span and keep tai of its preceeding sibling element HTML = """ <div><p><strong>str</strong>!<span>span</span>BAR<a href="#">link</a></p></div>""" tree = fromstring(HTML) replace_node_with_text(tree, './/span', 'FOO') self.assertEqual(render_html(tree, encoding='utf-8'), b'<div><p><strong>str</strong>' b'!FOOBAR<a href="#">link</a></p></div>')
def test_render_html(self): html = u'<html><body><p>фыва</p></body></html>' html_utf = html.encode('utf-8') tree = fromstring(html) self.assertEqual(html, render_html(tree)) self.assertEqual(html_utf, render_html(tree, encoding='utf-8')) self.assertEqual(html.encode('cp1251'), render_html(tree, encoding='cp1251'))
def test_drop_node(self): HTML = """ <div><p>text<span>span</span><a href="#">link</a></p>tail</div>""" tree = fromstring(HTML) drop_node(tree, './/p') self.assertEqual(render_html(tree, encoding='utf-8'), b'<div>tail</div>') tree = fromstring(HTML) drop_node(tree, './/span', keep_content=True) self.assertEqual(render_html(tree, encoding='utf-8'), b'<div><p>textspan<a href="#">link</a></p>tail</div>')
def test_drop_node(self): HTML = """ <div><p>text<span>span</span><a href="#">link</a></p>tail</div>""" tree = fromstring(HTML) drop_node(tree, './/p') self.assertEqual(render_html(tree, encoding='utf-8'), b'<div>tail</div>') tree = fromstring(HTML) drop_node(tree, './/span', keep_content=True) self.assertEqual( render_html(tree, encoding='utf-8'), b'<div><p>textspan<a href="#">link</a></p>tail</div>')
def html(self, encoding='unicode'): if self.is_text_node(): return self.node() else: return render_html(self.node(), encoding=encoding)