def _worker(self): logger.info(f"{self.name} start!") while self.is_running(): try: task = self.save_queue.get_nowait() except queue.Empty: self.thread_event.wait(1) continue feed_id = task.get("feed_task").get("id") title: str = task.get("title") content: str = task.get("content") link = task.get("link") unique_hash = "{}|{}".format( hashlib.md5(title.encode()).hexdigest(), hashlib.md5(content.encode()).hexdigest() ) tags = task.get("tags") publish_time = task.get("publish_time") # 根据 unique_hash 来决定这个 paper 是否已经存在了 # TODO 这里需要考虑文章更新的情况 if PaperModel.instance.get_paper_by_unique_hash(unique_hash): continue else: paper = PaperModel(feed_id=feed_id, title=title, content=content, link=link, unique_hash=unique_hash, tags=tags, pushed_status=0, publish_time=publish_time) paper.save() logger.debug(f"{self.name} paper saved, title: {title}, unique_hash: {unique_hash}") logger.info(f"{self.name} stop!")
def _worker(self): logger.info(f"{self.name} start!") task_queue = g.queue_context.feed_task_queue while self.is_running(): # 从db中找到所有的待刷新的 feed 源 row: FeedModel for row in FeedModel.instance.all(): logger.debug( f"name: {row.name} ,last_refresh_time: {row.last_refresh_time}" ) if row.last_refresh_time + datetime.timedelta( minutes=row.interval) > datetime.datetime.now(): continue # add to queue task = { "id": row.id, "name": row.name, "feed_url": row.feed_url, } self.put_to_queue(task_queue, task) logger.debug(f"put task {task} to queue.") # update last_refresh_time row.last_refresh_time = datetime.datetime.now() row.status = FeedStatus.UPDATING row.save() # wait for next wakeup self.thread_event.wait(self.refresh_interval) logger.info(f"{self.name} stop!")
async def _worker(self, idx): self._init_event() cur_name = f"{self.name}-{idx}" logger.info(f"{cur_name} start.") while self.is_running(): try: task = self.feed_task_queue.get_nowait() logger.debug(f"get task: {task}") # 从 task 中获取 feed_url feed_url = task.get("feed_url") if not feed_url: logger.warning(f"{cur_name} empty feed_url!") await self._wait(1) continue # 发起请求 async with aiohttp.request( "GET", feed_url, timeout=self.client_timeout) as resp: if resp.status != 200: logger.warning( f"{cur_name} Error while making request to {feed_url}, status code: {resp.status}" ) # TODO 更新 feed 的状态到 dead await self._wait(1) continue # status code 是 200 的情况 content = await resp.text() parser_task = { "feed_task": task, "content": content, } while self.is_running(): try: self.parser_task_queue.put_nowait(parser_task) except queue.Full: logger.warning( f"{cur_name} parser task queue full, retry..") await self._wait(1) else: break logger.debug(f"{cur_name} content size: {len(content)}") except queue.Empty: await self._wait(1) continue logger.info(f"{cur_name} stop.")
def start(self): signal.signal(signal.SIGINT, self.__sigint) logger.info("exit process register done.") self.init_queues() logger.info("Init queues done.") self.init_engines() logger.info("Init engines done.")
def __sigint(self, sig, frame): logger.info("Receive exit signal.") self.engines.stop_engines()