def spider_end(self): self.record_end_time() if self._end_callback: self._end_callback() for parser in self._parsers: if not self._keep_alive: parser.close() parser.end_callback() if not self._keep_alive: # 关闭webdirver if Request.webdriver_pool: Request.webdriver_pool.close() # 关闭打点 metrics.close() else: metrics.flush() # 计算抓取时长 data = self._redisdb.hget(self._tab_spider_time, SPIDER_START_TIME_KEY, is_pop=True) if data: begin_timestamp = int(data) spand_time = tools.get_current_timestamp() - begin_timestamp msg = "《%s》爬虫结束,耗时 %s" % ( self._spider_name, tools.format_seconds(spand_time), ) log.info(msg) self.send_msg(msg) if self._keep_alive: log.info("爬虫不自动结束, 等待下一轮任务...") else: self.delete_tables(self._tab_spider_status)
def run(self): self.start_callback() for i in range(self._thread_count): parser_control = AirSpiderParserControl(self._memory_db, self._item_buffer) parser_control.add_parser(self) parser_control.start() self._parser_controls.append(parser_control) self._item_buffer.start() self.distribute_task() while True: try: if self.all_thread_is_done(): # 停止 parser_controls for parser_control in self._parser_controls: parser_control.stop() # 关闭item_buffer self._item_buffer.stop() # 关闭webdirver if Request.webdriver_pool: Request.webdriver_pool.close() log.info("无任务,爬虫结束") break except Exception as e: log.exception(e) tools.delay_time(1) # 1秒钟检查一次爬虫状态 self.end_callback() # 为了线程可重复start self._started.clear() # 关闭打点 metrics.close()
from feapder.utils import metrics # 初始化打点系统 metrics.init() metrics.emit_counter("key", count=1, classify="test") metrics.close()