def test_is_internal_links(base, url): """ Are this links internal to url? """ assert not Link(base).is_external(url) assert not Link(url).is_external(base) assert not Link(base).is_external(URL(url)) assert not Link(url).is_external(URL(base))
def test_not_available_page(): """ ok server, but ip with error """ l = Link("http://127.0.0.1:79") assert l.status == Status.UNDEFINED assert not l.exists() assert "Failed to establish a new connection" in l.message
def test_order_nonurl_type(): with pytest.raises(TypeError): assert Link("https://example.com") == 1 with pytest.raises(TypeError): assert 1 > Link("https://example.com")
def test_referrer(): """ Test referrer. """ l = Link("https://made.ua") referrer = "https://example.com" l.add_referrer(referrer) l.add_referrer(referrer) assert referrer in l.get_referrers()
def test_same_url(server): page = "<a href='http://{}:{}'>same link</a>, <a href='/'>a</a>" address = server.router({ '^/$': Page(page.format(*server.sa)).exists(), '^/link$': Page("ok").exists(), }) l = Link(address) assert l.exists() assert address in l.links
def test_eq(): """ Compare two objects. """ assert Link("http://example.com") == Link("http://example.com") assert Link("http://example.com") == "http://example.com" assert "http://example.com" == Link("http://example.com") with pytest.raises(TypeError): Link('http://example.com') == 1 # pylint: disable=expression-not-assigned
def test_index(): """ general and only index test """ index = Index() assert len(index) == 0 links = [] # type: List[Link] links.append(Link("https://google.com")) links.append(Link("https://google.com")) links.append(Link("https://google.com")) links.append(Link("http://google.com")) links.append(Link("https://google.de")) links.append(Link("http://google.de")) links.append(Link("https://google.es")) links.append(Link("http://google.es")) for link in links: index.put(link) # total uniq links assert len(index) == 6 # __iter__ and __contains__ test. for link in index: assert link in index # __contains__ index.update(Link("http://google.fr"), Status.UNDEFINED, "no idea")
def test_link_nl(server): """ browsers ignore new line in links so should do that too. """ address = server.router({ '^/$': Page("<a href='/li\nnk'>a</a>").exists(), '^/link$': Page("ok").exists(), }) l = Link(address) l.exists() assert "/link" in l.links
def test_links(server): """ General testing for link. """ url = server.router({ '^/$': Page('<a href="https://example.com/">test</a>').exists(), }) l = Link(url) assert l.exists() assert len(l.links) == 1 assert str(l) == url assert l.url() == url
def test_redirected_page(server): """ Should raise IgnoredURL if Ignored """ address = server.router({ '^/$': Page("").redirects(pattern="https://example.com/?%s"), }) l = Link(address) assert l.status == Status.UNDEFINED with pytest.raises(DeadlinksRedirectionURL): l.exists() with pytest.raises(TypeError): l.status = 0
def test_order_eq(params_l1_eq_l2): from operator import eq l1, l2 = params_l1_eq_l2 assert l1 == l2 assert eq(l1, l2) ll1 = Link(l1) assert ll1 == l2 assert eq(ll1, l2) ll2 = Link(l2) assert ll1 == ll2 assert eq(ll1, ll2)
def test_is_external(base, url): """ External links. """ assert Link(base).is_external(URL(url)) assert Link(url).is_external(URL(base)) assert Link(base).is_external(Link(url)) assert Link(url).is_external(Link(base)) assert Link(base).is_external(url) assert Link(url).is_external(base)
def test_is_schema_valid(): assert Link("http://example.com").is_schema_valid() assert Link("https://example.com").is_schema_valid() assert Link("sftp://example.com").is_schema_valid() assert Link("ssh://example.com").is_schema_valid() assert Link("ws://example.com").is_schema_valid() assert Link("news://example.com").is_schema_valid() assert Link("mailto:[email protected]").is_schema_valid()
def test_order_neq(params_l1_lt_l2): from operator import lt, gt l1, l2 = params_l1_lt_l2 assert l1 < l2 assert lt(l1, l2) assert l2 > l1 assert gt(l2, l1) ll1 = Link(l1) assert ll1 < l2 assert lt(ll1, l2) assert l2 > ll1 assert gt(l2, ll1) ll2 = Link(l2) assert ll1 < ll2 assert lt(ll1, ll2) assert ll2 > ll1 assert gt(ll2, ll1)
def test_ignored_page(server): """ Should raise IgnoredURL if Ignored """ address = server.router({ '^/$': Page("").exists(), }) l = Link(address) assert l.status == Status.UNDEFINED l.status = Status.IGNORED assert l.status == Status.IGNORED with pytest.raises(DeadlinksIgnoredURL): l.exists() with pytest.raises(TypeError): l.status = 3
def test_existing_page(server): """ emulating slow server (responds after 1s) """ address = server.router({ '^/$': Page("").slow().exists(), }) l = Link(address) assert l.status == Status.UNDEFINED assert l.exists() l.status = Status.FOUND assert l.exists() with pytest.raises(TypeError): l.status = 1
def test_not_existing_page(server): """ emulating slow broken server """ address = server.router({ '^/$': Page("").unlock_after(3).slow().exists(), }) l = Link(address) assert l.status == Status.UNDEFINED # timed out assert not l.exists(retries=2) # setting new status l.status = Status.NOT_FOUND # page is unlocked, but response is cached! assert not l.exists() with pytest.raises(TypeError): l.status = 2
def test_url_link(base, url, expected): """ Relative link generation. """ assert Link(base).link(url) == expected
def test_is_crawlable(): assert Link("http://example.com").is_crawlable() assert Link("https://example.com").is_crawlable() assert not Link("ws://example.com").is_crawlable() assert not Link("ssh://example.com").is_crawlable()
def test_basepath_within_external(url, internal_link): assert not BaseURL(url).within(Link(internal_link)) assert not BaseURL(internal_link).within(Link(url))
def test_match_domain(): """ Domain matching. """ l = Link("https://made.ua") assert l.match_domains(["made.ua"]) assert not l.match_domains(["example.com"])
def test_base_update(settings): """ Attempts to update baseurl property """ with pytest.raises(DeadlinksSettingsBase): settings.base = Link("http://google.com")
def test_bad_links(url): assert not Link(url).exists()
def link(): """ Return valid config object. """ return Link("https://example.com")
def test_ignored(ignore_domains, ignore_pathes, url): """ Ignored domains and pathes matching. """ assert Link(url).match_domains(ignore_domains) assert Link(url).match_pathes(ignore_pathes)
def test_is_external_of_wrong_type(base, url): """ (Mis)Typed external links """ with pytest.raises(TypeError): assert Link(base).is_external(url)
def test_non_string_message(): """ (Mis)Typed external links """ with pytest.raises(TypeError): Link("http://example.com/").message = 404
def test_is_valid(url): """ Tests URL for valid (for crawler) format. """ assert Link(url).is_valid()