def test_make_absolute(): TEST_CASES = [ ("http://base.url", "relative", "http://base.url/relative"), ("http://base.url", ".", "http://base.url/"), ("http://base.url/with_folder", ".", "http://base.url/"), ("http://base.url/with_folder", "./with_dot", "http://base.url/with_dot"), ("http://base.url/with_folder", "..", "http://base.url/"), ("http://base.url/with_folder", "../folder", "http://base.url/folder"), ("http://base.url", "http://whole.url", "http://whole.url/"), ("http://base.url", "https://whole.url", "https://whole.url/"), ("http://base.url", "http://whole.url:987", "http://whole.url:987/"), ("http://base.url", "https://whole.url:987", "https://whole.url:987/"), ("http://base.url", "/", "http://base.url/"), ("http://base.url", "//", ""), ("http://base.url", "//only_this", "http://only_this/"), ("http://base.url", "./..//", "http://base.url/"), ("http://base.url", "./wrong_folder/../good_folder/", "http://base.url/good_folder/"), ] request = Request("GET", "http://base.url") response = Response(status_code=200, request=request) page = Page(response) for base_url, relative_url, expected in TEST_CASES: page._base = base_url assert page.make_absolute(relative_url) == expected, \ f"Absolute url from base_url='{base_url}' and relative_url='{relative_url}' is not '{expected}'"
def _extract_disconnect_urls(self, page: Page) -> List[str]: """ Extract all the disconnect urls on the given page and returns them. """ disconnect_urls = [] for link in page.links: if self.is_in_scope(link) is False: continue if re.search(DISCONNECT_REGEX, link) is not None: disconnect_urls.append(page.make_absolute(link)) return disconnect_urls