def test_link_checker(self): valid_urls = ['http://domain.com', 'http://example.com/path/to/page?name=ferret&color=purple', 'https://docs.python.org/2/library/unittest.html', 'http://127.0.0.1:8080', 'http://localhost:8080', 'http://web-nginx.example.com:80' ] invalid_urls = ['domain.com', 'http://domain', 'http://.com', 'file:///etc/fstab,' ] # test valid urls for url in valid_urls: self.assertTrue(HttpClient.is_link_ok(url)) # test invalid urls for url in invalid_urls: self.assertFalse(HttpClient.is_link_ok(url))
def page_links(self, link): """ Downloads defined url from link with content like images, css, JavaScript, then parse all urls in a href tags. and return it as list. :param link: web page to be processed :return: list of parsed urls. """ self.driver.get(link) links = self.driver.find_elements_by_tag_name('a') hrefs = [] for link in links: url = link.get_attribute("href") if HttpClient.is_link_ok(url): hrefs.append(url) self.wait() return hrefs