Beispiel #1
0
    async def set_result(self, request: Request, response: Response, task_request: Request):
        """
        保存结果
        @param request:
        @param response:
        @param task_request:
        """
        # 如果失败,且失败次数未达到,返回waiting
        str_request = request.serialize(self.module)

        # 如果在进行队列中,删除
        if str_request in self.pending:
            self.pending.pop(str_request)

        # 如果成功
        if response.ok == 1:
            return True

        if response.ok == -1:
            self.failure[str_request] = response.status
            return False

        if str_request in self.failure:
            self.failure[str_request] += 1
            await self.add(request)
        else:
            self.failure[str_request] = 1
            await self.add(request)
Beispiel #2
0
    async def check_pending_task(self):
        # 判断是否有超时的链接
        now_time = time.time()
        if now_time - self._last_check_pending_task_time > 10:
            self._last_check_pending_task_time = now_time
            now_time = time.time()

            with await self.pool as conn:
                pending_list = await conn.hgetall(self._pending_key)

                to_waiting_list = []
                del_pending_list = []
                for k, v in pending_list.items():
                    if now_time - int(v) > self._setting["PENDING_THRESHOLD"]:
                        request = Request.unserialize(k, self.module)
                        to_waiting_list.extend([request.priority, k])
                        del_pending_list.append(k)

                if to_waiting_list:
                    pipe = conn.pipeline()
                    pipe.zadd(self._waiting_key, *to_waiting_list)
                    pipe.hdel(self._pending_key, *del_pending_list)
                    result = await pipe.execute()

                    logger.info(f"pendings: {len(pending_list)}, del_pending: {result[1]}, add_waitings: {result[0]}")
Beispiel #3
0
 async def get(self, priority):
     """
     从队列中获取一个request
     """
     if not self.waiting.empty():
         result = await self.waiting.get()
         self.pending[result[1]] = get_timestamp()
         return Request.unserialize(result[1], self.module)
     return None
Beispiel #4
0
    async def get(self, priority):
        """
        从队列中获取一个request
        """

        message = await self.pool.subscribe(2)

        if message:
            request = Request.unserialize(message.body.decode(), self.module)
            request.message = message
        else:
            request = None
        return request
Beispiel #5
0
 async def _process_task(self, request: Request, task_id):
     """
     处理请求
     @param task_id:
     @param request: request对象
     """
     try:
         task_request = request.replace()
         # 处理请求和回调
         response = await self.handle_request(task_request)
         # 处理请求响应
         await self.process_response(request, response)
         # 处理请求结果
         await self.scheduler.set_result(request, response, task_request)
     except Exception as e:
         debug_msg = traceback.format_exc(self.logging.get_tb_limit())
         logger.error(f"{request} callback error \n{debug_msg}")
Beispiel #6
0
 async def _process_start_urls(self):
     """
     初始化start_urls, 添加到队列中去
     """
     try:
         request_list = [
             Request(url=url, callback=self.parse)
             for url in self.start_urls
         ]
         counts = 0
         if request_list:
             counts = await self.scheduler.add(request_list)
         logger.info(f"init start urls end, set {counts}")
     except Exception as e:
         # 初始化start_urls失败
         debug_msg = traceback.format_exc(self.logging.get_tb_limit())
         logger.error(f"init start urls error \n{debug_msg}")
Beispiel #7
0
    async def get(self, priority: typing.Union[int, list]):
        """
        从redis中获取request
        @param priority: 为None的时候,获取所有权重,否则获取指定的权重,可以是int,也可以是int列表
        @return: request
        """
        priority_list = []
        if priority is None:
            priority_list.append(("-inf", "+inf"))
        elif isinstance(priority, int):
            priority_list.append((priority, priority))
        else:
            priority_list = get_priority_list(priority)

        try:
            lua = """
                redis.replicate_commands()
                local waiting_key = KEYS[1]
                local pending_key = KEYS[2]
                local min = KEYS[3]
                local max = KEYS[4]

                -- 取值
                local result = redis.call('zrevrangebyscore', waiting_key, max, min, 'LIMIT', 0, 1)

                if result and table.getn(result) > 0 then
                    redis.call('zrem', waiting_key, result[1])
                    redis.call('hset', pending_key, result[1], redis.call('TIME')[1])
                    return result[1]
                end
                return nil
            """
            with await self.pool as conn:
                for p_item in priority_list:
                    _min, _max  = p_item
                    eval_result = await conn.eval(lua, keys=[self._waiting_key, self._pending_key, _min, _max], args=[])
                    if eval_result:
                        self.task_count += 1
                        return Request.unserialize(eval_result, self.module)
        except Exception as e:
            logger.error(f"get request error \n{traceback.format_exc()}")

        return None
Beispiel #8
0
    async def set_result(self, request: Request, response: Response, task_request: Request):
        """
        保存结果,设置状态(成功或失败)
        @param request:
        @param response:
        @param task_request:
        @return:
        """

        request_ser = request.serialize(self.module)
        with await self.pool as conn:
            if response.ok == 1:
                # 成功,删除pending队列
                await conn.hdel(self._pending_key, request_ser)
                self.task_success += 1
            else:
                failure_response = serialize_request_and_response(task_request, response)
                # 失败, 从等待队列中删除,并放到失败队列
                pipe = conn.pipeline()
                pipe.hdel(self._pending_key, request_ser)
                pipe.hset(self._failure_key, request_ser, failure_response)
                await pipe.execute()
                self.task_failure += 1
Beispiel #9
0
 async def start_requests(self):
     """
     用于初始化url,默认读取start_urls, 可重写
     """
     for url in self.start_urls:
         yield Request(url=url, callback=self.parse)