async def _process_start_urls(self): """ 初始化start_urls, 添加到队列中去 """ try: request_list = [ Request(url=url, callback=self.parse) for url in self.start_urls ] counts = 0 if request_list: counts = await self.scheduler.add(request_list) logger.info(f"init start urls end, set {counts}") except Exception as e: # 初始化start_urls失败 debug_msg = traceback.format_exc(self.logging.get_tb_limit()) logger.error(f"init start urls error \n{debug_msg}")
async def start_requests(self): """ 用于初始化url,默认读取start_urls, 可重写 """ for url in self.start_urls: yield Request(url=url, callback=self.parse)