def _failed(self, filename): urls = FileUtil.readfilelist(filename) for i, failed_url in enumerate(urls): html = self.downloader.download(failed_url) datas = self.parser.parse(failed_url, html) self.logger.info("the spider system has fetch %s failed links" % str(i + 1)) self.output.add_data(datas)
def _failed(self, filename): """ fetch failed link :param filename: :return: """ content = FileUtil.readfilelist(filename) for i, item in enumerate(content): url, title = item.split('\t') try: self._download(url, title) self.logger.info( "the spider system has crawl %s failed links" % str(i + 1)) except Exception as e: self.logger.info('fetch the failed contents still failed: %s' % str(e))