def test_empty_page(self): parser = HtmlParser("") self.assertEqual(parser.get_links(), []) self.assertEqual(parser.get_text(), '')
def test_img_alt(self): parser = HtmlParser( "<img alt='some text'></img>" ) self.assertEqual(parser.get_text(), "some text")
def test_script(self): parser = HtmlParser("<script type='text/javascript'>var a = 1</script>") self.assertEqual(parser.get_text(), '')
def test_numeric_character_reference(self): parser = HtmlParser( "ΣΣΣΣΣ" ) self.assertEqual(parser.get_text(), "¦²¦²¦²¦²¦²")
def test_character_entity_reference(self): parser = HtmlParser( "&<>" ) self.assertEqual(parser.get_text(), "&<>")
def test_new_line_test(self): parser = HtmlParser( "<div><p>some text</p></div><div><a>another text</a></div>" ) self.assertEqual(parser.get_text(), "some text\nanother text")
def test_text_no_tags(self): parser = HtmlParser( "some text" ) self.assertEqual(parser.get_text(), "some text")