Exemple #1
0
 def test_empty_page(self):
     parser = HtmlParser("")
     self.assertEqual(parser.get_links(), [])
     self.assertEqual(parser.get_text(), '')
Exemple #2
0
 def test_img_alt(self):
     parser = HtmlParser(
         "<img alt='some text'></img>"
     )
     self.assertEqual(parser.get_text(), "some text")
Exemple #3
0
 def test_script(self):
     parser = HtmlParser("<script type='text/javascript'>var a = 1</script>")
     self.assertEqual(parser.get_text(), '')
Exemple #4
0
 def test_numeric_character_reference(self):
     parser = HtmlParser(
         "&#931;&#0931;&#x3A3;&#x03A3;&#x3a3;"
     )
     self.assertEqual(parser.get_text(), "¦²¦²¦²¦²¦²")
Exemple #5
0
 def test_character_entity_reference(self):
     parser = HtmlParser(
         "&amp;&lt;&gt;"
     )
     self.assertEqual(parser.get_text(), "&<>")
Exemple #6
0
 def test_new_line_test(self):
     parser = HtmlParser(
         "<div><p>some text</p></div><div><a>another text</a></div>"
     )
     self.assertEqual(parser.get_text(), "some text\nanother text")
Exemple #7
0
 def test_text_no_tags(self):
     parser = HtmlParser(
         "some text"
     )
     self.assertEqual(parser.get_text(), "some text")