def test_parse(self):
     roots = HTML5Strip.parse("<p>hello, world</p>")
     self.assertEqual(roots[0].text, "hello, world")
 def test_get_blacklist(self):
     roots = HTML5Strip.parse("<p>hello, world<br></p>")
     fragment = HTML5Fragment(roots[0])
     self.assertEqual(fragment.blacklist, set(roots[0].findall("br")))
    def test_clean(self):
        self.assertEqual(HTML5Strip.strip("<a> X <b><c></c> X </b></a>"), "<a> X <b><c></c> X </b></a>")
        self.assertEqual(HTML5Strip.strip("<a> X <b><c></c></b> X </a>"), "<a> X <b><c></c></b> X </a>")
        self.assertEqual(HTML5Strip.strip("<a><b><c></c></b> X </a>"), "<a> X </a>")
        self.assertEqual(HTML5Strip.strip("<a> X <b><c></c></b></a>"), "<a> X </a>")

        self.assertEqual(HTML5Strip.strip("<div> X <div> X <div></div></div></div>"), "<div> X <div> X </div></div>")
        self.assertEqual(HTML5Strip.strip(" <p>hello, world</p> "), "<p>hello, world</p>")
        self.assertEqual(HTML5Strip.strip("<p>hello, world<br></p>"), "<p>hello, world</p>")
        self.assertEqual(HTML5Strip.strip("<br><p>hello, world</p><br>"), "<p>hello, world</p>")

        self.assertEqual(HTML5Strip.strip("<a><b><c><d></d></c> X </b></a>"), "<a><b> X </b></a>")
        self.assertEqual(HTML5Strip.strip("<a> <b> X </b> </a>"), "<a> <b> X </b> </a>")
        self.assertEqual(HTML5Strip.strip("<a> <b> <c> <d> <e>X</e> </d> </c> </b> </a>"), "<a> <b> <c> <d> <e>X</e> </d> </c> </b> </a>")
        self.assertEqual(HTML5Strip.strip("<a> <x></x> <b> X </b> </a>"), "<a>  <b> X </b> </a>") # preserve spaces
        self.assertEqual(HTML5Strip.strip("<a> </a> <b> </b> <c> X </c> <b> </b> <a> </a>"), "<c> X </c>")
        self.assertEqual(HTML5Strip.strip("<a> </a> <b> </b> <c> </c> <b> </b> </a>"), "")

        self.assertEqual(HTML5Strip.strip("<p>a<br><br>b</p>"), "<p>a<br><br>b</p>")
        self.assertEqual(HTML5Strip.strip("<p><br><br>b</p>"), "<p>b</p>")
        self.assertEqual(HTML5Strip.strip("<p>b<br><br></p>"), "<p>b</p>")

        self.assertEqual(HTML5Strip.strip("<a><b> x </b><br><br> c </a>"), "<a><b> x </b><br><br> c </a>")
        self.assertEqual(HTML5Strip.strip("<a><br><br><b> x </b><br><br> c </a>"), "<a><b> x </b><br><br> c </a>")
        self.assertEqual(HTML5Strip.strip("<a> a <br><br><b> x </b></a>"), "<a> a <br><br><b> x </b></a>")

        self.assertEqual(HTML5Strip.strip("<a></a>x"), "x")
        self.assertEqual(HTML5Strip.strip("x<a></a>"), "x")
        self.assertEqual(HTML5Strip.strip("x<a></a>y"), "xy")

        self.assertEqual(HTML5Strip.strip("<p>a</p> <p></p> <p></p> <p>a<br></p>"), "<p>a</p> <p></p> <p></p> <p>a</p>")
        self.assertEqual(HTML5Strip.strip("<p></p> <p><br>a</p> <p></p> <p>a<br></p> <p></p>"), "<p>a</p> <p></p> <p>a</p>")
        self.assertEqual(HTML5Strip.strip("<p></p> <p><br>a</p> <p><br>a</p> <p></p> <p>a</p>"), "<p>a</p> <p><br>a</p> <p></p> <p>a</p>")
        self.assertEqual(HTML5Strip.strip("<p></p> <p>a</p> <p></p> <p>a<br></p> <p>a<br></p>"), "<p>a</p> <p></p> <p>a<br></p> <p>a</p>")
 def test_has_no_text_on_html(self):
     roots = HTML5Strip.parse(u"<p>\u2000</p>")
     self.assertFalse(HTML5Fragment.has_text(roots[0].text))
 def test_has_text_on_html(self):
     roots = HTML5Strip.parse(u"<p>hello, world</p>")
     self.assertTrue(HTML5Fragment.has_text(roots[0].text))
 def test_strip_tail(self):
     fragment = HTML5Fragment(HTML5Strip.parse("<p>hello,<br>world</p>")[0])
     res = fragment.strip(set(fragment.root.findall("br")))
     self.assertEqual(res, "<p>hello,world</p>")
 def test_strip_root(self):
     fragment = HTML5Fragment(HTML5Strip.parse("<p>hello, world</p>")[0])
     res = fragment.strip(set([fragment.root]))
     self.assertEqual(res, "")
 def test_parse_fragments(self):
     roots = HTML5Strip.parse("<p>hello</p><p>world</p>")
     self.assertEqual([r.text for r in roots], ["hello", "world"])