def test_list_resources(url, threads = 1, max_page_depth = None, max_pages_to_crawl = None): handler = CrawlerHandler() home_page = handler.start_crawling(url, threads, max_page_depth, max_pages_to_crawl, 0) #Looks for a page that doesn't exist resources = handler.list_resources(home_page + str(random())) for s in resources.values(): assert(len(s) == 0) #Looks for a page that DOES exist resources = handler.list_resources(home_page) assert(len( reduce(lambda s1, s2: s1 | s2, resources.values())) > 0) #At least some resource should be found
def test_crawler(url, threads = 1, max_page_depth = None, max_pages_to_crawl = None): handler = CrawlerHandler() handler.start_crawling(url, threads, max_page_depth, max_pages_to_crawl, 0) return handler.list_resources()