Ejemplo n.º 1
0
 def test_render_html(self):
     html = u'<html><body><p>фыва</p></body></html>'
     html_utf = html.encode('utf-8')
     tree = fromstring(html)
     self.assertEqual(html_utf, render_html(tree))
     self.assertEqual(html, render_html(tree, make_unicode=True))
     self.assertEqual(html.encode('cp1251'), render_html(tree, encoding='cp1251'))
Ejemplo n.º 2
0
    def test_replace_node_with_text(self):
        # replace span
        HTML = """
            <div><p><span>span</span><a href="#">link</a></p></div>
        """
        tree = fromstring(HTML)
        replace_node_with_text(tree, './/span', 'FOO')
        self.assertTrue(render_html(tree) == '<div><p>FOO<a href="#">link</a></p></div>')

        # replace span and keep its tail
        HTML = """
            <div><p><span>span</span>BAR<a href="#">link</a></p></div>
        """
        tree = fromstring(HTML)
        replace_node_with_text(tree, './/span', 'FOO')
        self.assertTrue(render_html(tree) == '<div><p>FOOBAR<a href="#">link</a></p></div>')

        # replace p which is only child of parent div
        HTML = """
            <div><p><span>span</span>BAR<a href="#">link</a></p></div>
        """
        tree = fromstring(HTML)
        replace_node_with_text(tree, './/p', 'FOO')
        self.assertTrue(render_html(tree) == '<div>FOO</div>')

        # replace span and keep tai of its preceeding sibling element
        HTML = """
            <div><p><strong>str</strong>!<span>span</span>BAR<a href="#">link</a></p></div>
        """
        tree = fromstring(HTML)
        replace_node_with_text(tree, './/span', 'FOO')
        self.assertTrue(render_html(tree) == '<div><p><strong>str</strong>!FOOBAR<a href="#">link</a></p></div>')
Ejemplo n.º 3
0
    def test_replace_node_with_text(self):
        # replace span
        HTML = """
            <div><p><span>span</span><a href="#">link</a></p></div>"""
        tree = fromstring(HTML)
        replace_node_with_text(tree, './/span', 'FOO')
        self.assertTrue(render_html(tree) == b'<div><p>FOO<a href="#">link</a></p></div>')

        # replace span and keep its tail
        HTML = """
            <div><p><span>span</span>BAR<a href="#">link</a></p></div>"""
        tree = fromstring(HTML)
        replace_node_with_text(tree, './/span', 'FOO')
        self.assertTrue(render_html(tree) == b'<div><p>FOOBAR<a href="#">link</a></p></div>')

        # replace p which is only child of parent div
        HTML = """
            <div><p><span>span</span>BAR<a href="#">link</a></p></div>"""
        tree = fromstring(HTML)
        replace_node_with_text(tree, './/p', 'FOO')
        self.assertTrue(render_html(tree) == b'<div>FOO</div>')

        # replace span and keep tai of its preceeding sibling element
        HTML = """
            <div><p><strong>str</strong>!<span>span</span>BAR<a href="#">link</a></p></div>"""
        tree = fromstring(HTML)
        replace_node_with_text(tree, './/span', 'FOO')
        self.assertTrue(render_html(tree) == b'<div><p><strong>str</strong>!FOOBAR<a href="#">link</a></p></div>')
Ejemplo n.º 4
0
 def test_render_html(self):
     html = u'<html><body><p>фыва</p></body></html>'
     html_utf = html.encode('utf-8')
     tree = fromstring(html)
     self.assertEqual(html_utf, render_html(tree))
     self.assertEqual(html, render_html(tree, make_unicode=True))
     self.assertEqual(html.encode('cp1251'), render_html(tree, encoding='cp1251'))
Ejemplo n.º 5
0
    def test_drop_node(self):
        HTML = """
            <div><p>text<span>span</span><a href="#">link</a></p>tail</div>"""
        tree = fromstring(HTML)
        drop_node(tree, './/p')
        self.assertTrue(render_html(tree) == b'<div>tail</div>')

        tree = fromstring(HTML)
        drop_node(tree, './/span', keep_content=True)
        self.assertTrue(render_html(tree) == b'<div><p>textspan<a href="#">link</a></p>tail</div>')
Ejemplo n.º 6
0
    def test_drop_node(self):
        HTML = """
            <div><p>text<span>span</span><a href="#">link</a></p>tail</div>
        """
        tree = fromstring(HTML)
        drop_node(tree, './/p')
        self.assertTrue(render_html(tree) == '<div>tail</div>')

        tree = fromstring(HTML)
        drop_node(tree, './/span', keep_content=True)
        self.assertTrue(render_html(tree) == '<div><p>textspan<a href="#">link</a></p>tail</div>')
Ejemplo n.º 7
0
 def parse_project_description(self, root):
     for node in root.xpath("//br"):
         node.tail = (node.tail or "") + "\n"
     text = strip_tags(decode_entities(render_html(root, encoding="unicode")), normalize_space=False)
     text = text.split(u"Posted On")[0].strip()
     text = text.split(u"Budget :")[0].strip()
     return text
Ejemplo n.º 8
0
 def parse_project_description(self, root):
     for node in root.xpath('//br'):
         node.tail = (node.tail or '') + '\n'
     text = strip_tags(decode_entities(render_html(root,
                                                   encoding='unicode')),
                       normalize_space=False)
     text = text.split(u'Category:')[0].strip()
     return text
Ejemplo n.º 9
0
 def html(self, encoding='unicode'):
     return render_html(self.node, encoding=encoding)
Ejemplo n.º 10
0
 def html(self, encoding='unicode'):
     return render_html(self.node, encoding=encoding)