def _scrape_single_article(self, d): """ Single article scraper that will be called by a process within a multiprocessing pool """ try: helper = Fetcher._get_helper(d.root) if helper: self.scrape_article(d.url, helper) except Exception as e: print("Exception when scraping article at {0}: {1!r}".format(d.url, e))
def _parse_single_article(self, d): """ Single article parser that will be called by a process within a multiprocessing pool """ try: helper = Fetcher._get_helper(d.root) if helper: self.parse_article(d.url, helper) # Save the unknown verbs accumulated during parsing, if any UnknownVerbs.write() except Exception as e: print("Exception when parsing article at {0}: {1!r}".format(d.url, e))
def _scrape_single_article(self, d): """ Single article scraper that will be called by a process within a multiprocessing pool """ try: helper = Fetcher._get_helper(d.root) if helper: self.scrape_article(d.url, helper) except Exception as e: logging.warning( "[{2}] Exception when scraping article at {0}: {1!r}".format( d.url, e, d.seq)) if Settings.DEBUG: traceback.print_stack()
def _scrape_single_article(self, d): """ Single article scraper that will be called by a process within a multiprocessing pool """ try: helper = Fetcher._get_helper(d.root) if helper: self.scrape_article(d.url, helper) except Exception as e: logging.warning( "[{2}] Exception when scraping article at {0}: {1!r}".format( d.url, e, d.seq ) )
def _scrape_single_root(self, r): """ Single root scraper that will be called by a process within a multiprocessing pool """ if r.domain.endswith(".local"): # We do not scrape .local roots return try: print("Scraping root of {0} at {1}...".format(r.description, r.url)) # Process a single top-level domain and root URL, # parsing child URLs that have not been seen before helper = Fetcher._get_helper(r) if helper: self.scrape_root(r, helper) except Exception as e: print("Exception when scraping root at {0}: {1!r}".format(r.url, e))
def _scrape_single_root(self, r): """ Single root scraper that will be called by a process within a multiprocessing pool """ if r.domain.endswith(".local"): # We do not scrape .local roots return try: logging.info("Scraping root of {0} at {1}...".format(r.description, r.url)) # Process a single top-level domain and root URL, # parsing child URLs that have not been seen before helper = Fetcher._get_helper(r) if helper: self.scrape_root(r, helper) except Exception as e: logging.warning( "Exception when scraping root at {0}: {1!r}".format(r.url, e) )
def _parse_single_article(self, d): """ Single article parser that will be called by a process within a multiprocessing pool """ try: helper = Fetcher._get_helper(d.root) if helper: self.parse_article(d.seq, d.url, helper) except KeyboardInterrupt: logging.info("KeyboardInterrupt in _parse_single_article()") sys.exit(1) except MemoryError: # Nothing to do but give up on this process sys.exit(1) except Exception as e: logging.warning( "[{2}] Exception when parsing article at {0}: {1!r}".format( d.url, e, d.seq)) # traceback.print_exc() # raise return True
def _parse_single_article(self, d): """ Single article parser that will be called by a process within a multiprocessing pool """ try: helper = Fetcher._get_helper(d.root) if helper: self.parse_article(d.seq, d.url, helper) except KeyboardInterrupt: logging.info("KeyboardInterrupt in _parse_single_article()") sys.exit(1) except MemoryError: # Nothing to do but give up on this process sys.exit(1) except Exception as e: logging.warning( "[{2}] Exception when parsing article at {0}: {1!r}".format( d.url, e, d.seq ) ) # traceback.print_exc() # raise return True