def test_find_all_follow_on_links(self): all_links = hundredgreatest.get_links_in_soup(self.soup) follow_on_links_numbers = [] all_links_as_str = [str(l) for l in all_links] pattern = re.compile(r"^<a.*(\S+)(\?page=)(\d)\S+</a>") for l in all_links_as_str: m = re.match(pattern, l) if m: follow_on_links_numbers.append(m.group(3)) theset = set(follow_on_links_numbers) self.assertEqual(hundredgreatest.get_follow_on_link_numbers(self.soup), theset)
def test_compiled_soups(self): compiled_soups = set() valid_urls = {"{0}{1}{2}".format(BEST_NOVELS, "?page=", l) for l in hundredgreatest.get_follow_on_link_numbers(self.soup)} # valid_urls = {BEST_NOVELS, BEST_NOVELS + "?page=2", BEST_NOVELS + "?page=3", BEST_NOVELS + "?page=4"} valid_urls.add(BEST_NOVELS) for u in valid_urls: r = urllib.request.urlopen(u) html = r.read() soup = BeautifulSoup(html) compiled_soups.add(soup) self.assertEqual(hundredgreatest.raw_compiled_soups(self.soup), compiled_soups)
def test_list_of_all_valid_urls(self): valid_url_links = {"{0}{1}{2}".format(BEST_NOVELS, "?page=", l) for l in hundredgreatest.get_follow_on_link_numbers(self.soup)} valid_url_links.add(BEST_NOVELS) self.assertEqual(hundredgreatest.valid_urls_to_soupify(self.soup), valid_url_links)