Ejemplo n.º 1
0
 def find_all(self):
     '''查询所有proxy'''
     cursor = self.proxies.find()
     for item in cursor:
         item.pop('_id') #删除_id键
         proxy = Proxy(**item)
         yield proxy
Ejemplo n.º 2
0
    def find(self, conditions=None, count=0):
        """
        根据条件查询代理IP
        :param conditions: # 字典形式的查询条件
        :param count: 查询多少条数据
        :return: 返回先按分数降序, 后按响应速度升序排列前count条数据, 如果count==0, 就查询所有的,
        """
        # 如果没有conditions, 将conditions设置为{}
        if conditions is None:
            conditions = {}

        # 获取查询的游标地下
        cursor = self.proxies.find(conditions, limit=count).sort([
            ("score", pymongo.DESCENDING), ("speed", pymongo.ASCENDING)
        ])
        # 创建一个list, 用于存储Proxy
        results = []
        # 变量游标, 获取代理IP
        for item in cursor:
            # 创建Proxy对象
            proxy = Proxy(item['ip'],
                          item['port'],
                          score=item['score'],
                          protocol=item['protocol'],
                          nick_type=item['nick_type'],
                          speed=item['speed'],
                          disable_domains=item['disable_domains'])
            # 把Proxy对象添加到结果集
            results.append(proxy)
        # 返回查询的结果
        return results
Ejemplo n.º 3
0
 def find_all(self):
     '''查询集合中所有代理IP的功能'''
     cursor = self.proxies.find()  # 列表
     for item in cursor:  # item字典
         item.pop("_id")  # 删除item字典的_id这个key
         proxy = Proxy(**item)
         yield proxy
Ejemplo n.º 4
0
 def find_all(self):
     cursor = self.proxies.find()
     for item in cursor:
         # 删除 '_id' 字段
         item.pop('_id')
         proxy = Proxy(**item)
         yield proxy
Ejemplo n.º 5
0
 def find_all(self):
     cusor = self.proxies.find()
     for item in cusor:
         # yield item
         #删除_id 这个key
         item.pop('_id')
         proxy = Proxy(**item)
         yield proxy
Ejemplo n.º 6
0
 def find_all(self):
     """数据库的查询操作"""
     cursor = self.proxies.find()
     for item in cursor:
         # 删除item中的“_id”
         item.pop('_id')
         proxy = Proxy(**item)
         yield proxy
Ejemplo n.º 7
0
 def find_all(self):
     #查询所有
     cursor = self.proxies.find()
     for item in cursor:
         #item中有_id proxy没有
         item.pop('_id')
         proxy = Proxy(**item)
         yield proxy
Ejemplo n.º 8
0
 def find_all(self):
     """2.4 查询所有代理IP的功能"""
     cursor = self.proxies.find()
     for item in cursor:
         # 删除_id这个key
         item.pop('_id')
         proxy = Proxy(**item)
         yield proxy
Ejemplo n.º 9
0
 def find_all(self):
     """查询所有代理数据功能"""
     cursor = self.proxies.find()
     for item in cursor:
         # 删除 `_id` key
         item.pop('_id')
         proxy = Proxy(**item)
         yield proxy
Ejemplo n.º 10
0
	def find_all(self):
		query_set = self.proxies.find()
		proxy_list = []
		for item in query_set:
			item.pop('_id')
			proxy = Proxy(**item)
			proxy_list.append(proxy)
		return proxy_list
Ejemplo n.º 11
0
 def get_proxies_from_page(self, page):
     element = etree.HTML(page)
     trs = element.xpath(self.group_xpath)
     for tr in trs:
         ip = self.get_first_from_list(tr.xpath(self.detail_xpath['ip']))
         port = self.get_first_from_list(tr.xpath(self.detail_xpath['port']))
         area = self.get_first_from_list(tr.xpath(self.detail_xpath['area']))
         proxy = Proxy(ip, port, area)
         yield proxy
Ejemplo n.º 12
0
 def find_all(self):
     """2.4 查找所有代理IP"""
     # 查找到的是所有IP的字典集合
     cursor = self.proxies.find()
     for item in cursor:
         # 为了生成proxy对象返回,所以需要删除冗余的_id字段
         item.pop('_id')
         # print(**item)
         proxy = Proxy(**item)
         yield proxy
Ejemplo n.º 13
0
 def find_all(self):
     """
     查询所有代理IP的功能
     :return:
     """
     cursor = self.proxies.find()
     for item in cursor:
         item.pop('_id')
         proxy = Proxy(**item)
         yield proxy
Ejemplo n.º 14
0
    def find(self, conditions={}, count=0):
        cursor = self.proxies.find(conditions, limit=count).sort([
            ('score', pymongo.DESCENDING), ('speed', pymongo.ASCENDING)
        ])
        proxy_list = []

        for item in cursor:
            item.pop('_id')
            proxy = Proxy(**item)
            proxy_list.append(proxy)
        return proxy_list
Ejemplo n.º 15
0
 def parse_proxies_from_page(self, page):
     """解析页面, 提取数据, 封装为Proxy对象"""
     element = etree.HTML(page)
     trs = element.xpath(self.group_xpath)
     for tr in trs:
         ip = self.get_first_from_page(tr.xpath(self.detail_xpath['ip']))
         port = self.get_first_from_page(tr.xpath(
             self.detail_xpath['port']))
         area = self.get_first_from_page(tr.xpath(
             self.detail_xpath['area']))
         proxy = Proxy(ip, port, area=area)
         yield proxy
Ejemplo n.º 16
0
 def get_proxies_from_page(self, page):
     html = etree.HTML(page)
     trs = html.xpath(self.group_xpath)
     for tr in trs:
         ip = tr.xpath(self.detail_xpath['ip'])[0] if len(
             tr.xpath(self.detail_xpath['ip'])) > 0 else None
         port = tr.xpath(self.detail_xpath['port'])[0] if len(
             tr.xpath(self.detail_xpath['port'])) > 0 else None
         area = tr.xpath(self.detail_xpath['area'])[0] if len(
             tr.xpath(self.detail_xpath['area'])) > 0 else None
         proxy = Proxy(ip=ip, port=port, area=area)
         yield proxy
Ejemplo n.º 17
0
    def find_all(self):
        """
        查询数据库中所有的代理ip
        """
        cursor = self.proxies.find()

        for item in cursor:
            # 删除_id键值对
            item.pop('_id')
            proxy = Proxy(**item)
            # 生成器yield
            yield proxy
Ejemplo n.º 18
0
 def find_all(self):
     '''
     查询所以ip代理的功能
     :return:
     '''
     cursor = self.proxies.find()
     for item in cursor:
         # print(item)
         item.pop('_id')
         # print(item)
         proxy = Proxy(**item)
         yield proxy
Ejemplo n.º 19
0
 def get_proxies_from_page(self, page):
     """解析页面,提取数据,封装为Proxy对象"""
     element = etree.HTML(page)
     # 获取包含代理IP信息的标签列表
     trs = element.xpath(self.group_xpath)
     # 遍历trs,获取代理IP相关信息
     for tr in trs:
         ip = self.get_frist_from_list(tr.xpath(self.detail_xpath['ip']))
         port = self.get_frist_from_list(tr.xpath(self.detail_xpath['port']))
         area = self.get_frist_from_list(tr.xpath(self.detail_xpath['area']))
         proxy = Proxy(ip, port, area=area)
         # 使用yield返回提取到的数据
         yield proxy
Ejemplo n.º 20
0
    def limit_find(self, conditions={}, count=0):
        '''根据条件进行查询,
        可以指定查询数量, 先分数降序, 速度升序排,
        保证优质的代理IP在上面'''
        cursor = self.proxies.find(conditions, limit=count).sort([
            ('score', pymongo.DESCENDING), ('speed', pymongo.ASCENDING)])
        # 接受查询所得代理IP
        proxy_list = []

        for item in cursor:
            item.pop('_id')
            proxy = Proxy(**item)
            proxy_list.append(proxy)
        return proxy_list
Ejemplo n.º 21
0
 def get_proxies_from_page(self, page):
     '''解析页面'''
     element = etree.HTML(page)
     # 获取包含代理ip信息的标签列表
     trs = element.xpath(self.group_xpath)
     # 遍历trs  获取ip
     for tr in trs:
         ip = self.get_first_from_list(tr.xpath(self.detail_xpath['ip']))
         port = self.get_first_from_list(tr.xpath(
             self.detail_xpath['port']))
         area = self.get_first_from_list(tr.xpath(
             self.detail_xpath['area']))
         proxy = Proxy(ip, port, area=area)
         yield proxy
Ejemplo n.º 22
0
 def get_data(self, data):
     response = requests.post(self.url,
                              data=data,
                              headers=get_request_header())
     content = response.content.decode()
     dict = json.loads(content)
     data = dict['ret_data']['html']
     html = etree.HTML(data)
     trs = html.xpath(self.group_xpath)
     for tr in trs:
         ip = tr.xpath(self.detail_xpath['ip'])[0]
         port = tr.xpath(self.detail_xpath['port'])[0]
         area = tr.xpath(self.detail_xpath['area'])[0]
         proxy = Proxy(ip, port, area=area)
         yield proxy
Ejemplo n.º 23
0
 def get_proxies_from_page(self,page):
     element = etree.HTML(page)
     # 获取包含代理IP信息的标签列表
     trs = element.xpath(self.group_xpath)
     # 遍历trs,获取代理IP相关信息
     for tr in trs:
         ip = tr.xpath(self.detail_xpath["ip"])[0]
         port = tr.xpath(self.detail_xpath["port"])[0]
         area = tr.xpath(self.detail_xpath["area"])  # 注:有的没有区域,需要进行判断
         if area:
             area = area[0]
         else:
             area=""
         proxy = Proxy(ip,port,area=area)
         yield proxy
Ejemplo n.º 24
0
 def find(self, conditions={}, count=0):
     """
     :param conditions: 要查询的key:value
     :param count: 显示的数量
     :return:返回一个满足要求的一个代理ip列表
     """
     # 对代理池的ip进行排序,score降序,speed升序
     cursor = self.proxies.find(conditions, limit=count).sort(
         [('score', pymongo.DESCENDING), ('speed', pymongo.ASCENDING)])
     proxy_list = []
     for item in cursor:
         item.pop('_id')
         proxy = Proxy(**item)
         proxy_list.append(proxy)
     return proxy_list
Ejemplo n.º 25
0
 def get_proxies_from_page(self, page):
     element = etree.HTML(page)
     trs = element.xpath(self.group_xpath)
     for tr in trs:
         ip_and_port = tr.xpath(self.detail_xpath['ip_and_port'])[0]
         ip_and_port = str(ip_and_port)
         ip = ip_and_port.split(':')[0]
         port = ip_and_port.split(':')[1]
         try:
             area = tr.xpath(self.detail_xpath['area'])[0]
         except Exception as ex:
             # logger.debug(ex)
             area = None
         proxy = Proxy(ip, port, area=area)
         yield proxy
Ejemplo n.º 26
0
 def get_proxies_from_page(self, page):
     #解析提取数据,封装为Proxy对象
     element = etree.HTML(page)
     print(element.text)
     #获取包含代理IP信息的标签列表
     trs = element.xpath(self.group_xpath)
     # print(trs,"666")
     #遍历trs,获取代理IP相关信息
     for tr in trs:
         ip = self.get_first_from_list(tr.xpath(self.detail_xpath['ip']))
         port = self.get_first_from_list(tr.xpath(
             self.detail_xpath['port']))
         area = self.get_first_from_list(tr.xpath(
             self.detail_xpath['area']))
         proxy = Proxy(ip, port, area=area)
         yield proxy
Ejemplo n.º 27
0
 def find(self, conditions={}, count=0):
     """
     根据条件进行查询
     :param conditions:查询条件字典
     :param count: 限制最多取出多少个ip
     :return: 返回满足要求的ip列表
     """
     cursor=self.proxies.find(conditions,limit=count).\
         sort([('score',pymongo.DESCENDING),('speed',pymongo.ASCENDING)])
     #准备列表用于存储指针
     proxy_list = []
     for item in cursor:
         item.pop('_id')
         proxy = Proxy(**item)
         proxy_list.append(proxy)
     return proxy_list
Ejemplo n.º 28
0
 def get_proxies_from_page(self, page):
     """解析页面,提取数据,封装为Proxy对象"""
     element = etree.HTML(page)
     print(element)
     # 获取包含代理IP信息的标签列表
     trs = element.xpath(self.group_xpath)
     print(trs)
     # 遍历trs,获取代理ip相关信息
     for tr in trs:
         # 解析出的是一个list,加上[0]给变量赋值列表的值
         ip = tr.xpath(self.detail_xpath['ip'])[0]
         port = tr.xpath(self.detail_xpath['port'])[0]
         area = tr.xpath(self.detail_xpath['area'])[0]
         proxy = Proxy(ip, port, area=area)
         # 使用yield返回提取到的数据
         yield proxy
Ejemplo n.º 29
0
 def find(self, conditions={}, count=0):
     '''
     根据指定条件进行查询
     :param conditions: 查询条件字典
     :param count: 查询数量
     :return: 代理IP列表
     '''
     cursor = self.collection.find(conditions, limit=count).sort([
         ('score', pymongo.DESCENDING), ('idle', pymongo.ASCENDING)
     ])
     proxy_list = []
     for item in cursor:
         item.pop('_id')
         proxy = Proxy(**item)
         proxy_list.append(proxy)
     return proxy_list
Ejemplo n.º 30
0
 def get_proxies_from_page(self, page):
     element = etree.HTML(page)
     #包含代理信息标签列表
     trs = element.xpath(self.group_xpath)
     #遍历trl,获取代理具体信息
     for tr in trs:
         ip = tr.xpath(self.detail_xpath['ip'])[0].strip()
         port = tr.xpath(self.detail_xpath['port'])[0].strip()
         try:
             area = tr.xpath(self.detail_xpath['area'])[0].strip()
         except Exception as ex:
             #logger.debug(ex)
             area = None
         proxy = Proxy(ip, port, area=area)
         #生成器
         yield proxy