Example #1
0
def crawl_page(thread_name, page_url):
    if page_url not in Spider.crawled:
        print(thread_name + ' now crawling ' + page_url)
        print('Queue size: ' + str(len(Spider.queue)) + ' | Crawled files:  ' +
              str(len(Spider.crawled)))

        Spider.add_links_to_queue(Spider.gather_links(page_url))
        Spider.queue.remove(page_url)
        Spider.crawled.add(page_url)
        Spider.update_files()