Beispiel #1
0
class Crawler:
    def __init__(self, origin):
        self._origin = origin
        self._work = Work()
        self._work.put(origin)

    def _scrape(self, url):
        response = requests.get(url)
        page = Page(self._origin, response)
        list(map(self._work.put, page.links))
        print(page.url)
        print(*page.links, sep='\n- ')

    def crawl(self):
        with ThreadPoolExecutor(max_workers=30) as executor:
            while True:
                try:
                    url = self._work.get(timeout=30)
                    executor.submit(self._scrape, url)
                except Empty:
                    return
                except Exception as e:
                    print(e)
                    continue