def test_05_parse_page(self): url = 'http://xkcd.com/292' raw_text = mod_net.get_web_page(url) links = mod_html.extract_page_links(raw_text, '') self.assertEqual(len(raw_text) > 1000, True) self.assertEqual(len(links) > 5, True) #self.assertEqual(mod_html.extract_content(raw_text).strip()[0:10], 'xkcd: goto') self.assertEqual(mod_html.extract_content(raw_text).strip()[-13:], 'More details.')
def test_05_parse_page(self): url = 'http://xkcd.com/292' raw_text = mod_net.get_web_page(url) links = mod_html.extract_page_links(raw_text, '') self.assertEqual(len(raw_text) > 1000, True) self.assertEqual(len(links) > 5, True) #self.assertEqual(mod_html.extract_content(raw_text).strip()[0:10], 'xkcd: goto') self.assertEqual( mod_html.extract_content(raw_text).strip()[-13:], 'More details.')
def test_04_extract_content_b(self): txt = 'this is some test, and <a href=http://xkcd.com/292>this is a link</a>' self.assertEqual(mod_html.extract_content(txt), 'this is some test, and this is a link')
def test_03_extract_content_a(self): txt = '<html><body><H1>this is some html</H1><BR>but there are no links</body></html>' self.assertEqual(mod_html.extract_content(txt), 'this is some htmlbut there are no links')