def test_for_list_of_all_valid_links(self): all_links = [] for c in hundredgreatest.get_all_compiled_links( hundredgreatest.raw_compiled_soups( hundredgreatest.setup(hundredgreatest.BASEURL))): for l in c: all_links.append(l) self.assertEqual(hundredgreatest.put_all_compiled_links_in_list(), all_links)
def test_compiled_soups(self): compiled_soups = set() valid_urls = {"{0}{1}{2}".format(BEST_NOVELS, "?page=", l) for l in hundredgreatest.get_follow_on_link_numbers(self.soup)} # valid_urls = {BEST_NOVELS, BEST_NOVELS + "?page=2", BEST_NOVELS + "?page=3", BEST_NOVELS + "?page=4"} valid_urls.add(BEST_NOVELS) for u in valid_urls: r = urllib.request.urlopen(u) html = r.read() soup = BeautifulSoup(html) compiled_soups.add(soup) self.assertEqual(hundredgreatest.raw_compiled_soups(self.soup), compiled_soups)
def test_get_all_novel_titles(self): all_links = [] for c in hundredgreatest.get_all_compiled_links( hundredgreatest.raw_compiled_soups( hundredgreatest.setup(hundredgreatest.BASEURL))): for l in c: all_links.append(l) regex_for_all_links = re.compile(r"<a.*\"(.*)\">(.*)</a>") regex_for_novel_titles = re.compile(r"<a.*>(The 100 best novels: No.*)</a>") novel_only_titles = [] list_of_link_texts = [] list_of_link_link_urls = [] for link in all_links: r = re.search(regex_for_all_links, link) rn = re.search(regex_for_novel_titles, link) if r: list_of_link_texts.append(r.group(2)) list_of_link_link_urls.append(r.group(1)) if rn: novel_only_titles.append(rn.group(1)) self.assertEqual(hundredgreatest.get_all_novel_titles(), novel_only_titles)
def test_get_links_in_all_compiled_raw_soups(self): compiled_soups = hundredgreatest.raw_compiled_soups(self.soup) all_links = [] for s in compiled_soups: all_links.append(hundredgreatest.all_links_as_strings(s)) self.assertEqual(hundredgreatest.get_all_compiled_links(compiled_soups), all_links)