Exemple #1
0
def main():
    param = docopt(__doc__)
    cfg = configure(param)
    logger = getLogger(cfg)
    crawler = Crawler(cfg["URL"], logger=logger)
    crawler.crawl()
    print("Errors found: ")
    if not crawler.errors:
        print("None")
    else:
        for key in crawler.errors:
            print(key)
            for value in crawler.errors[key]:
                print("    " + value)

    print("\nRedirects found (may want to update): ")
    if not crawler.redirects:
        print("None")
    else:
        for key in crawler.redirects:
            print(key)
            for value in crawler.redirects[key]:
                print("  Old:" + value[0] + " | New: " + value[1])

    print("\nServer Errors returned: ")
    if not crawler.server_errors:
        print("None")
    else:
        for key in crawler.server_errors:
            print(key)
            for value in crawler.server_errors[key]:
                print("    " + value)
Exemple #2
0
def test_check_500_source():
    resp = mock.Mock()
    resp.status_code = 500
    resp.history = None

    c = Crawler(localhost)
    out = c.check(resp, url='/test', source=localhost)
    assert out is None
    assert c.server_errors[localhost] == ['/test']
    assert not c.errors
    assert not c.redirects
Exemple #3
0
def test_check_404_no_source():
    resp = mock.Mock()
    resp.status_code = 404
    resp.history = None

    c = Crawler(localhost)
    out = c.check(resp)
    assert out is None
    assert c.errors[None] == ['']
    assert not c.server_errors
    assert not c.redirects
Exemple #4
0
def test_check_204():
    resp = mock.Mock()
    resp.status_code = 204
    resp.history = None

    c = Crawler(localhost)
    out = c.check(resp)
    assert out == resp
    assert not c.errors
    assert not c.server_errors
    assert not c.redirects
Exemple #5
0
def test_check_301_source():
    redir = mock.Mock()
    redir.status_code = 301
    resp = mock.Mock()
    resp.status_code = 200
    resp.url = localhost + '/redir'
    resp.history = [redir]

    c = Crawler(localhost)
    out = c.check(resp, url='/test', source=localhost)
    assert out == resp
    assert not c.server_errors
    assert not c.errors
    assert c.redirects[localhost] == [('/test', resp.url)]