def main(): param = docopt(__doc__) cfg = configure(param) logger = getLogger(cfg) crawler = Crawler(cfg["URL"], logger=logger) crawler.crawl() print("Errors found: ") if not crawler.errors: print("None") else: for key in crawler.errors: print(key) for value in crawler.errors[key]: print(" " + value) print("\nRedirects found (may want to update): ") if not crawler.redirects: print("None") else: for key in crawler.redirects: print(key) for value in crawler.redirects[key]: print(" Old:" + value[0] + " | New: " + value[1]) print("\nServer Errors returned: ") if not crawler.server_errors: print("None") else: for key in crawler.server_errors: print(key) for value in crawler.server_errors[key]: print(" " + value)
def test_check_500_source(): resp = mock.Mock() resp.status_code = 500 resp.history = None c = Crawler(localhost) out = c.check(resp, url='/test', source=localhost) assert out is None assert c.server_errors[localhost] == ['/test'] assert not c.errors assert not c.redirects
def test_check_404_no_source(): resp = mock.Mock() resp.status_code = 404 resp.history = None c = Crawler(localhost) out = c.check(resp) assert out is None assert c.errors[None] == [''] assert not c.server_errors assert not c.redirects
def test_check_204(): resp = mock.Mock() resp.status_code = 204 resp.history = None c = Crawler(localhost) out = c.check(resp) assert out == resp assert not c.errors assert not c.server_errors assert not c.redirects
def test_check_301_source(): redir = mock.Mock() redir.status_code = 301 resp = mock.Mock() resp.status_code = 200 resp.url = localhost + '/redir' resp.history = [redir] c = Crawler(localhost) out = c.check(resp, url='/test', source=localhost) assert out == resp assert not c.server_errors assert not c.errors assert c.redirects[localhost] == [('/test', resp.url)]