Ejemplo n.º 1
0
def test_is_internal_links(base, url):
    """ Are this links internal to url? """

    assert not Link(base).is_external(url)
    assert not Link(url).is_external(base)
    assert not Link(base).is_external(URL(url))
    assert not Link(url).is_external(URL(base))
Ejemplo n.º 2
0
def test_not_available_page():
    """ ok server, but ip with error """

    l = Link("http://127.0.0.1:79")
    assert l.status == Status.UNDEFINED
    assert not l.exists()
    assert "Failed to establish a new connection" in l.message
Ejemplo n.º 3
0
def test_order_nonurl_type():

    with pytest.raises(TypeError):
        assert Link("https://example.com") == 1

    with pytest.raises(TypeError):
        assert 1 > Link("https://example.com")
Ejemplo n.º 4
0
def test_referrer():
    """ Test referrer. """

    l = Link("https://made.ua")
    referrer = "https://example.com"
    l.add_referrer(referrer)
    l.add_referrer(referrer)

    assert referrer in l.get_referrers()
Ejemplo n.º 5
0
def test_same_url(server):
    page = "<a href='http://{}:{}'>same link</a>, <a href='/'>a</a>"
    address = server.router({
        '^/$': Page(page.format(*server.sa)).exists(),
        '^/link$': Page("ok").exists(),
    })
    l = Link(address)
    assert l.exists()
    assert address in l.links
Ejemplo n.º 6
0
def test_eq():
    """ Compare two objects. """

    assert Link("http://example.com") == Link("http://example.com")
    assert Link("http://example.com") == "http://example.com"
    assert "http://example.com" == Link("http://example.com")

    with pytest.raises(TypeError):
        Link('http://example.com') == 1 # pylint: disable=expression-not-assigned
Ejemplo n.º 7
0
def test_index():
    """ general and only index test """
    index = Index()

    assert len(index) == 0

    links = []  # type: List[Link]

    links.append(Link("https://google.com"))
    links.append(Link("https://google.com"))
    links.append(Link("https://google.com"))
    links.append(Link("http://google.com"))
    links.append(Link("https://google.de"))
    links.append(Link("http://google.de"))
    links.append(Link("https://google.es"))
    links.append(Link("http://google.es"))

    for link in links:
        index.put(link)

    # total uniq links
    assert len(index) == 6

    # __iter__ and __contains__ test.
    for link in index:
        assert link in index  # __contains__

    index.update(Link("http://google.fr"), Status.UNDEFINED, "no idea")
Ejemplo n.º 8
0
def test_link_nl(server):
    """ browsers ignore new line in links so should do that too. """

    address = server.router({
        '^/$': Page("<a href='/li\nnk'>a</a>").exists(),
        '^/link$': Page("ok").exists(),
    })

    l = Link(address)
    l.exists()
    assert "/link" in l.links
Ejemplo n.º 9
0
def test_links(server):
    """ General testing for link. """

    url = server.router({
        '^/$': Page('<a href="https://example.com/">test</a>').exists(),
    })

    l = Link(url)

    assert l.exists()
    assert len(l.links) == 1
    assert str(l) == url
    assert l.url() == url
Ejemplo n.º 10
0
def test_redirected_page(server):
    """ Should raise IgnoredURL if Ignored """
    address = server.router({
        '^/$': Page("").redirects(pattern="https://example.com/?%s"),
    })

    l = Link(address)
    assert l.status == Status.UNDEFINED
    with pytest.raises(DeadlinksRedirectionURL):
        l.exists()

    with pytest.raises(TypeError):
        l.status = 0
Ejemplo n.º 11
0
def test_order_eq(params_l1_eq_l2):
    from operator import eq

    l1, l2 = params_l1_eq_l2
    assert l1 == l2
    assert eq(l1, l2)

    ll1 = Link(l1)
    assert ll1 == l2
    assert eq(ll1, l2)

    ll2 = Link(l2)
    assert ll1 == ll2
    assert eq(ll1, ll2)
Ejemplo n.º 12
0
def test_is_external(base, url):
    """ External links. """

    assert Link(base).is_external(URL(url))
    assert Link(url).is_external(URL(base))
    assert Link(base).is_external(Link(url))
    assert Link(url).is_external(Link(base))
    assert Link(base).is_external(url)
    assert Link(url).is_external(base)
Ejemplo n.º 13
0
def test_is_schema_valid():
    assert Link("http://example.com").is_schema_valid()
    assert Link("https://example.com").is_schema_valid()
    assert Link("sftp://example.com").is_schema_valid()
    assert Link("ssh://example.com").is_schema_valid()
    assert Link("ws://example.com").is_schema_valid()
    assert Link("news://example.com").is_schema_valid()
    assert Link("mailto:[email protected]").is_schema_valid()
Ejemplo n.º 14
0
def test_order_neq(params_l1_lt_l2):

    from operator import lt, gt

    l1, l2 = params_l1_lt_l2
    assert l1 < l2
    assert lt(l1, l2)
    assert l2 > l1
    assert gt(l2, l1)

    ll1 = Link(l1)

    assert ll1 < l2
    assert lt(ll1, l2)
    assert l2 > ll1
    assert gt(l2, ll1)

    ll2 = Link(l2)
    assert ll1 < ll2
    assert lt(ll1, ll2)
    assert ll2 > ll1
    assert gt(ll2, ll1)
Ejemplo n.º 15
0
def test_ignored_page(server):
    """ Should raise IgnoredURL if Ignored """
    address = server.router({
        '^/$': Page("").exists(),
    })

    l = Link(address)
    assert l.status == Status.UNDEFINED
    l.status = Status.IGNORED
    assert l.status == Status.IGNORED
    with pytest.raises(DeadlinksIgnoredURL):
        l.exists()

    with pytest.raises(TypeError):
        l.status = 3
Ejemplo n.º 16
0
def test_existing_page(server):
    """ emulating slow server (responds after 1s) """

    address = server.router({
        '^/$': Page("").slow().exists(),
    })

    l = Link(address)
    assert l.status == Status.UNDEFINED
    assert l.exists()
    l.status = Status.FOUND
    assert l.exists()

    with pytest.raises(TypeError):
        l.status = 1
Ejemplo n.º 17
0
def test_not_existing_page(server):
    """ emulating slow broken server """

    address = server.router({
        '^/$': Page("").unlock_after(3).slow().exists(),
    })

    l = Link(address)
    assert l.status == Status.UNDEFINED

    # timed out
    assert not l.exists(retries=2)
    # setting new status
    l.status = Status.NOT_FOUND

    # page is unlocked, but response is cached!
    assert not l.exists()

    with pytest.raises(TypeError):
        l.status = 2
Ejemplo n.º 18
0
def test_url_link(base, url, expected):
    """ Relative link generation. """
    assert Link(base).link(url) == expected
Ejemplo n.º 19
0
def test_is_crawlable():

    assert Link("http://example.com").is_crawlable()
    assert Link("https://example.com").is_crawlable()
    assert not Link("ws://example.com").is_crawlable()
    assert not Link("ssh://example.com").is_crawlable()
Ejemplo n.º 20
0
def test_basepath_within_external(url, internal_link):
    assert not BaseURL(url).within(Link(internal_link))
    assert not BaseURL(internal_link).within(Link(url))
Ejemplo n.º 21
0
def test_match_domain():
    """ Domain matching. """

    l = Link("https://made.ua")
    assert l.match_domains(["made.ua"])
    assert not l.match_domains(["example.com"])
Ejemplo n.º 22
0
def test_base_update(settings):
    """ Attempts to update baseurl property """
    with pytest.raises(DeadlinksSettingsBase):
        settings.base = Link("http://google.com")
Ejemplo n.º 23
0
def test_bad_links(url):
    assert not Link(url).exists()
Ejemplo n.º 24
0
def link():
    """ Return valid config object. """
    return Link("https://example.com")
Ejemplo n.º 25
0
def test_ignored(ignore_domains, ignore_pathes, url):
    """ Ignored domains and pathes matching. """

    assert Link(url).match_domains(ignore_domains)
    assert Link(url).match_pathes(ignore_pathes)
Ejemplo n.º 26
0
def test_is_external_of_wrong_type(base, url):
    """ (Mis)Typed external links """

    with pytest.raises(TypeError):
        assert Link(base).is_external(url)
Ejemplo n.º 27
0
def test_non_string_message():
    """ (Mis)Typed external links """

    with pytest.raises(TypeError):
        Link("http://example.com/").message = 404
Ejemplo n.º 28
0
def test_is_valid(url):
    """ Tests URL for valid (for crawler) format. """
    assert Link(url).is_valid()