Esempio n. 1
0
 def start(self):
     for url in CRAWLER_INITIAL_URLS:
         self.enqueue(resource_from_url(url))
     while (not self.q.empty() and not self.limits_reached()):
         resource = self.dequeue()
         for link in download_and_get_links(resource):
             self.visit(link)
Esempio n. 2
0
 def visit(self, url_or_resource):
     if isinstance(url_or_resource, Resource):
         resource = url_or_resource
     else:
         resource = resource_from_url(url_or_resource)
     logging.info('visiting {}'.format(resource.url))
     if resource is None or self.is_seen(resource):
         return
     self.enqueue(resource)