コード例 #1
0
ファイル: scraper.py プロジェクト: busla/Reynir
 def _scrape_single_article(self, d):
     """ Single article scraper that will be called by a process within a
         multiprocessing pool """
     try:
         helper = Fetcher._get_helper(d.root)
         if helper:
             self.scrape_article(d.url, helper)
     except Exception as e:
         print("Exception when scraping article at {0}: {1!r}".format(d.url, e))
コード例 #2
0
ファイル: scraper.py プロジェクト: busla/Reynir
 def _parse_single_article(self, d):
     """ Single article parser that will be called by a process within a
         multiprocessing pool """
     try:
         helper = Fetcher._get_helper(d.root)
         if helper:
             self.parse_article(d.url, helper)
             # Save the unknown verbs accumulated during parsing, if any
             UnknownVerbs.write()
     except Exception as e:
         print("Exception when parsing article at {0}: {1!r}".format(d.url, e))
コード例 #3
0
 def _scrape_single_article(self, d):
     """ Single article scraper that will be called by a process within a
         multiprocessing pool """
     try:
         helper = Fetcher._get_helper(d.root)
         if helper:
             self.scrape_article(d.url, helper)
     except Exception as e:
         logging.warning(
             "[{2}] Exception when scraping article at {0}: {1!r}".format(
                 d.url, e, d.seq))
         if Settings.DEBUG:
             traceback.print_stack()
コード例 #4
0
ファイル: scraper.py プロジェクト: vthorsteinsson/Reynir
 def _scrape_single_article(self, d):
     """ Single article scraper that will be called by a process within a
         multiprocessing pool """
     try:
         helper = Fetcher._get_helper(d.root)
         if helper:
             self.scrape_article(d.url, helper)
     except Exception as e:
         logging.warning(
             "[{2}] Exception when scraping article at {0}: {1!r}".format(
                 d.url, e, d.seq
             )
         )
コード例 #5
0
ファイル: scraper.py プロジェクト: busla/Reynir
 def _scrape_single_root(self, r):
     """ Single root scraper that will be called by a process within a
         multiprocessing pool """
     if r.domain.endswith(".local"):
         # We do not scrape .local roots
         return
     try:
         print("Scraping root of {0} at {1}...".format(r.description, r.url))
         # Process a single top-level domain and root URL,
         # parsing child URLs that have not been seen before
         helper = Fetcher._get_helper(r)
         if helper:
             self.scrape_root(r, helper)
     except Exception as e:
         print("Exception when scraping root at {0}: {1!r}".format(r.url, e))
コード例 #6
0
ファイル: scraper.py プロジェクト: vthorsteinsson/Reynir
 def _scrape_single_root(self, r):
     """ Single root scraper that will be called by a process within a
         multiprocessing pool """
     if r.domain.endswith(".local"):
         # We do not scrape .local roots
         return
     try:
         logging.info("Scraping root of {0} at {1}...".format(r.description, r.url))
         # Process a single top-level domain and root URL,
         # parsing child URLs that have not been seen before
         helper = Fetcher._get_helper(r)
         if helper:
             self.scrape_root(r, helper)
     except Exception as e:
         logging.warning(
             "Exception when scraping root at {0}: {1!r}".format(r.url, e)
         )
コード例 #7
0
 def _parse_single_article(self, d):
     """ Single article parser that will be called by a process within a
         multiprocessing pool """
     try:
         helper = Fetcher._get_helper(d.root)
         if helper:
             self.parse_article(d.seq, d.url, helper)
     except KeyboardInterrupt:
         logging.info("KeyboardInterrupt in _parse_single_article()")
         sys.exit(1)
     except MemoryError:
         # Nothing to do but give up on this process
         sys.exit(1)
     except Exception as e:
         logging.warning(
             "[{2}] Exception when parsing article at {0}: {1!r}".format(
                 d.url, e, d.seq))
         # traceback.print_exc()
         # raise
     return True
コード例 #8
0
ファイル: scraper.py プロジェクト: vthorsteinsson/Reynir
 def _parse_single_article(self, d):
     """ Single article parser that will be called by a process within a
         multiprocessing pool """
     try:
         helper = Fetcher._get_helper(d.root)
         if helper:
             self.parse_article(d.seq, d.url, helper)
     except KeyboardInterrupt:
         logging.info("KeyboardInterrupt in _parse_single_article()")
         sys.exit(1)
     except MemoryError:
         # Nothing to do but give up on this process
         sys.exit(1)
     except Exception as e:
         logging.warning(
             "[{2}] Exception when parsing article at {0}: {1!r}".format(
                 d.url, e, d.seq
             )
         )
         # traceback.print_exc()
         # raise
     return True