Beispiel #1
0
 def parse_index(self, response):
     """
     解析索引页
     :param response: 响应
     :return: 新的响应
     """
     doc = pq(response.text)
     items = doc('.news-box .news-list li .txt-box h3 a').items()
     for item in items:
         url = item.attr('href')
         weixin_request = WeixinRequest(url=url, callback=self.parse_detail)
         yield weixin_request
     next = doc('#sogou_next').attr('href')
     if next:
         url = self.base_url + str(next)
         weixin_request = WeixinRequest(url=url, callback=self.parse_index, need_proxy=True)
         yield weixin_request
Beispiel #2
0
 def start(self):
     self.session.headers.update(self.headers)
     start_url = self.base_url + '?' + urlencode({
         'query': self.keyword,
         'type': 2
     })
     weixin_request = WeixinRequest(url=start_url,
                                    callback=self.parse_index,
                                    need_proxy=True)
     self.queue.add(weixin_request)
Beispiel #3
0
 def start(self):
     """
     初始化工作
     """
     # 全局更新Headers
     # self.update_cookie()
     self.session.headers.update(self.headers)
     start_url = self.base_url + '?' + urlencode({'query': self.keyword, 'type': 2,'sut':7956,'lkt':'1%2C1553052272863%2C1553052272863','s_from':'input','_sug_':'y','sst0':'1553052272967','ie':'utf8','w':'01019900','dr':'1'})
     weixin_request = WeixinRequest(url=start_url, callback=self.parse_index, need_proxy=True)
     # 调度第一个请求
     self.queue.add(weixin_request)
 def parse_index(self, response):
     """
     解析索引页
     :param response: 响应
     :return: 新的响应
     """
     doc = pq(response.text)
     # 解析响应内容,并转为键值对的形式
     items = doc('.news-box .news-list li .txt-box h3 a').items()
     for item in items:
         # 获取详情页URL
         url = item.attr('href')
         # 构建详情页请求
         weixin_request = WeixinRequest(url=url, callback=self.parse_detail)
         yield weixin_request
     # 获取下一页列表页数据
     next = doc('#sogou_next').attr('href')
     if next:
         url = self.base_url + str(next)
         # 构建下一页列表页请求对象
         weixin_request = WeixinRequest(url=url,
                                        callback=self.parse_index,
                                        need_proxy=True)
         yield weixin_request
Beispiel #5
0
 def start(self):
     """
     初始化工作
     """
     # 全局更新Headers
     self.session.headers.update(self.headers)
     start_url = self.base_url + '?' + urlencode({
         'query': self.keyword,
         'type': 2
     })
     weixin_request = WeixinRequest(url=start_url,
                                    callback=self.parse_index,
                                    need_proxy=False,
                                    timeout=15)
     # 调度第一个请求
     self.queue.add(weixin_request)
 def start(self):
     """
     初始化工作
     """
     # 全局更新Headers
     self.session.headers.update(self.headers)
     # 拼接参数
     start_url = self.base_url + '?' + parse.urlencode({
         'query': self.keyword,
         'type': 2
     })
     # 构建请求对象
     weixin_request = WeixinRequest(url=start_url,
                                    callback=self.parse_index,
                                    need_proxy=True)
     # 将请求加入到队列中
     self.queue.add(weixin_request)
 def start(self):
     """
     初始化工作
     """
     # 全局更新Headers,使得所有请求都能应用Cookies
     self.session.headers.update(self.headers)
     # 起始URL的构造
     start_url = self.base_url + '?' + urlencode({
         'query': self.keyword,
         'type': 2
     })
     # 构造WeixinRequest对象,回调函数:请求成功后用parse_index()处理和解析 need_proxy参数执行请求须用代理
     weixin_request = WeixinRequest(url=start_url,
                                    callback=self.parse_index,
                                    need_proxy=True)
     # 请求加入队列,调度第一个请求
     self.queue.add(weixin_request)
Beispiel #8
0
        :return: 添加结果
        """
        if isinstance(request, WeixinRequest):
            return self.db.rpush(REDIS_KEY, dumps(request))
        return False

    def pop(self):
        """
        取出下一个Request并反序列化
        :return: Request or None
        """
        if self.db.llen(REDIS_KEY):
            return loads(self.db.lpop(REDIS_KEY))
        else:
            return False

    def clear(self):
        self.db.delete(REDIS_KEY)

    def empty(self):
        return self.db.llen(REDIS_KEY) == 0

if __name__ == '__main__':
    db = RedisQueue()
    start_url = 'http://www.baidu.com'
    weixin_request = WeixinRequest(url=start_url, callback='hello', need_proxy=True)
    db.add(weixin_request)
    request = db.pop()
    print(request)
    print(request.callback, request.need_proxy)