def test_extract_link(self): html = decode_url('http://www.zhihu.com') links = extract_link(html) self.assertEqual(links, []) html = decode_url('http://www.zhihu.com/topic/19554298') links = extract_link(html) self.assertTrue(len(links) > 5)
def test_rank_page(self): hot_page = decode_url('http://www.zhihu.com/question/28676107') cold_page = decode_url('http://www.zhihu.com/question/19555512') self.assertTrue(rank_page(hot_page) > rank_page(cold_page))
def test_extract_title(self): html = decode_url('http://www.zhihu.com') title = extract_title(html) self.assertEqual(title, '知乎')
def test_decode_url(self): html = decode_url('http://www.zhihu.com') self.assertTrue('知乎' in html[:1000]) # Unit test with Chinese characteristics with self.assertRaises(urllib.error.URLError): decode_url('https://www.google.com')