Exemple #1
0
 def max(self, proxy: Proxy) -> int:
     """
     set proxy to max score
     :param proxy: proxy
     :return: new score
     """
     logger.info(f'{proxy.string()} is valid, set to {PROXY_SCORE_MAX}')
     if IS_REDIS_VERSION_2:
         return self.db.zadd(REDIS_KEY, PROXY_SCORE_MAX, proxy.string())
     return self.db.zadd(REDIS_KEY, {proxy.string(): PROXY_SCORE_MAX})
Exemple #2
0
 def add(self, proxy: Proxy, score=PROXY_SCORE_INIT) -> int:
     """
     add proxy and set it to init score
     :param proxy: proxy, ip:port, like 8.8.8.8:88
     :param score: int score
     :return: result
     """
     if not is_valid_proxy(f'{proxy.host}:{proxy.port}'):#判断获得的ip是否合法
         logger.info(f'invalid proxy {proxy}, throw it')
         return
     if not self.exists(proxy):
         if IS_REDIS_VERSION_2:
             return self.db.zadd(REDIS_KEY, score, proxy.string())
         return self.db.zadd(REDIS_KEY, {proxy.string(): score})
Exemple #3
0
 def decrease(self, proxy: Proxy) -> int:
     """
     decrease score of proxy, if small than PROXY_SCORE_MIN, delete it
     :param proxy: proxy
     :return: new score
     """
     if IS_REDIS_VERSION_2:
         self.db.zincrby(REDIS_KEY, proxy.string(), -1)
     else:
         self.db.zincrby(REDIS_KEY, -1, proxy.string())#如果ip验证失败则分数减一
     score = self.db.zscore(REDIS_KEY, proxy.string())
     logger.info(f'{proxy.string()} score decrease 1, current {score}')
     if score <= PROXY_SCORE_MIN:
         logger.info(f'{proxy.string()} current score {score}, remove')
         self.db.zrem(REDIS_KEY, proxy.string())#如果分数小于0则移除ip
Exemple #4
0
 def exists(self, proxy: Proxy) -> bool:
     """
     if proxy exists
     :param proxy: proxy
     :return: if exists, bool
     """
     return not self.db.zscore(REDIS_KEY, proxy.string()) is None
Exemple #5
0
 def decrease(self, proxy: Proxy) -> int:
     """
     decrease score of proxy, if small than PROXY_SCORE_MIN, delete it
     :param proxy: proxy
     :return: new score
     """
     score = self.db.zscore(REDIS_KEY, proxy.string())
     # current score is larger than PROXY_SCORE_MIN
     if score and score > PROXY_SCORE_MIN:
         logger.info(f'{proxy.string()} current score {score}, decrease 1')
         if IS_REDIS_VERSION_2:
             return self.db.zincrby(REDIS_KEY, proxy.string(), -1)
         return self.db.zincrby(REDIS_KEY, -1, proxy.string())
     # otherwise delete proxy
     else:
         logger.info(f'{proxy.string()} current score {score}, remove')
         return self.db.zrem(REDIS_KEY, proxy.string())
Exemple #6
0
 def parse(self, html):
     doc = pq(html)
     trs = doc('.cont br').items()
     for tr in trs:
         line = tr[0].tail
         match = re.search(r'(\d+\.\d+\.\d+\.\d+):(\d+)', line)
         if match:
             host = match.group(1)
             port = match.group(2)
             yield Proxy(host=host, port=port)
Exemple #7
0
 def parse(self, html):
     """
     parse html file to get proxies
     :return:
     """
     doc = pq(html)
     trs = doc('.containerbox table tr:gt(0)').items()
     for tr in trs:
         host = tr.find('td:nth-child(1)').text()
         port = int(tr.find('td:nth-child(2)').text())
         yield Proxy(host=host, port=port)
Exemple #8
0
 def parse(self, html):
     """
     parse html file to get proxies
     :return:
     """
     doc = pq(html)
     items = doc('.wlist ul.l2').items()
     for item in items:
         host = item.find('span:first-child').text()
         port = int(item.find('span:nth-child(2)').text())
         yield Proxy(host=host, port=port)
Exemple #9
0
 def parse(self, html):
     """
     parse html file to get proxies
     :return:
     """
     doc = pq(html)
     for item in doc('table tr').items():
         td_ip = item.find('td[data-title="IP"]').text()
         td_port = item.find('td[data-title="PORT"]').text()
         if td_ip and td_port:
             yield Proxy(host=td_ip, port=td_port)
Exemple #10
0
 def parse(self, html):
     """
     parse html file to get proxies
     :return:
     """
     ip_address = re.compile('<tr>\s*<td>(.*?)</td>\s*<td>(.*?)</td>')
     # \s * 匹配空格,起到换行作用
     re_ip_address = ip_address.findall(html)
     for address, port in re_ip_address:
         proxy = Proxy(host=address.strip(), port=int(port.strip()))
         yield proxy
Exemple #11
0
def convert_proxy_or_proxies(data):
    """
    convert list of str to valid proxies or proxy
    :param data:
    :return:
    """
    if not data:
        return None
    # if list of proxies
    if isinstance(data, list):
        result = []
        for item in data:
            # skip invalid item
            item = item.strip()
            if not is_valid_proxy(item): continue
            host, port = item.split(':')
            result.append(Proxy(host=host, port=int(port)))
        return result
    if isinstance(data, str) and is_valid_proxy(data):
        host, port = data.split(':')
        return Proxy(host=host, port=int(port))
Exemple #12
0
    def parse(self, html):
        """
        parse html file to get proxies
        :return:
        """
        etree_html = etree.HTML(html)
        ip_ports = etree_html.xpath("//tbody/tr/td[1]/text()")

        for ip_port in ip_ports:
            host = ip_port.partition(":")[0]
            port = ip_port.partition(":")[2]
            yield Proxy(host=host, port=port)
Exemple #13
0
 def parse(self, html):
     """
     parse html file to get proxies
     :return:
     """
     doc = pq(html)
     items = doc('#ip_list tr:contains(高匿)').items()
     for item in items:
         country = item.find('td.country').text()
         if not country or country.strip() != '高匿':
             continue
         host = item.find('td:nth-child(2)').text()
         port = int(item.find('td:nth-child(3)').text())
         yield Proxy(host=host, port=port)
Exemple #14
0
 def parse(self, html):
     """
     parse html file to get proxies
     :return:
     """
     find_tr = re.compile('<tr>(.*?)</tr>', re.S)
     trs = find_tr.findall(html)
     for s in range(1, len(trs)):
         find_ip = re.compile('<td>\s+(\d+\.\d+\.\d+\.\d+)\s+</td>', re.S)
         re_ip_address = find_ip.findall(trs[s])
         find_port = re.compile('<td>\s+(\d+)\s+</td>', re.S)
         re_port = find_port.findall(trs[s])
         for address, port in zip(re_ip_address, re_port):
             proxy = Proxy(host=address.strip(), port=int(port.strip()))
             yield proxy