Example #1
0
    def process(self, url, ishub):
        status, html, redirected_url = fn.downloader(url)
        self.urlpool.set_status(url, status)
        if redirected_url != url:
            self.urlpool.set_status(redirected_url, status)

        if status != 200: return
        if ishub:
            newlinks = fn.extract_links_re(redirected_url, html)
            goodlinks = self.filter_good(newlinks)
            print("%s/%s, goodlinks/newlinks" % (len(goodlinks), len(newlinks)))
            self.urlpool.addmany(goodlinks)
        else:
            self.save_to_db(redirected_url, html)
Example #2
0
 async def process(self, url, ishub):
     status, html, redirected_url = await fn.fetch(self.session, url)
     self.urlpool.set_status(url, status)
     if redirected_url != url:
         self.urlpool.set_status(redirected_url, status)
     if status != 200: return
     if ishub:
         newlinks = fn.extract_links_re(redirected_url, html)
         goodlinks = self.filter_good(newlinks)
         print("%s/%s, goodlinks/newlinks - %s" % (len(goodlinks), len(newlinks), url))
         self.urlpool.addmany(goodlinks)
     else:
         await self.save_to_db(redirected_url, html)
     self._workers -= 1