def setUp(self): test_dir = os.path.abspath(os.path.join(__file__, "..")) sample = os.path.join(test_dir, "query_result.html") with file(sample, "r") as _fp: self.contents = _fp.read().decode("utf-8") self.parser = FJUDHTMLParser()
class HTMLParserTest(unittest.TestCase): def setUp(self): test_dir = os.path.abspath(os.path.join(__file__, "..")) sample = os.path.join(test_dir, "query_result.html") with file(sample, "r") as _fp: self.contents = _fp.read().decode("utf-8") self.parser = FJUDHTMLParser() def test_parse_sn_and_url(self): self.parser.feed(self.contents) urls = self.parser.get_url_dict() self.assertEqual(20, len(urls)) self.assertTrue(u"104,台抗,661" in urls) self.assertTrue(u"104,台上,2308" in urls) def test_next_page_bad_input(self): self.parser.feed('<img src="python-logo.png" alt="The Python logo">') self.assertFalse(self.parser.has_next_page()) self.assertEqual("", self.parser.get_next_page_url()) def test_next_page_trivial(self): self.parser.feed(ur'<a href="http://foo">下一頁</a>') self.assertTrue(self.parser.has_next_page()) self.assertEqual("http://foo", self.parser.get_next_page_url()) def test_next_page_standard(self): self.parser.feed(self.contents) self.assertTrue(self.parser.has_next_page()) self.assertNotEqual("", self.parser.get_next_page_url())