def testRequestAllLink(self): url = "http://www.jehovahs-witness.com" agent = "VegeBot-Careful" source = LinkChecker.get_page_source(url, agent=agent, from_src="*****@*****.**", retries=0) links = LinkChecker.get_all_links_from_source(source) for link in links: paras = urlsplit(link) page_scheme, page_domain = paras[0], paras[1] print(LinkChecker.get_valid_link(page_domain, link.strip(), page_scheme))
def testRequestAllLink(self): url = "http://www.jehovahs-witness.com" agent = "VegeBot-Careful" source = LinkChecker.get_page_source( url, agent=agent, from_src="*****@*****.**", retries=0) links = LinkChecker.get_all_links_from_source(source) for link in links: paras = urlsplit(link) page_scheme, page_domain = paras[0], paras[1] print( LinkChecker.get_valid_link(page_domain, link.strip(), page_scheme))
def test_get_all_links(self): link = "http://web.archive.org/web/20140711025724/http://susodigital.com/" source = LinkChecker.get_page_source(link) all_links = LinkChecker.get_all_links_from_source(source) for link in all_links: print(link)