Esempio n. 1
0
 def max(self, proxy: Proxy) -> int:
     """
     将代理的分数设置为100
     :param proxy: proxy
     :return: new score
     """
     logger.info(f'{proxy.string()} is valid, set to {PROXY_SCORE_MAX}')
     if IS_REDIS_VERSION_2:
         return self.db.zadd(REDIS_KEY, PROXY_SCORE_MAX, proxy.string())
     return self.db.zadd(REDIS_KEY, {proxy.string(): PROXY_SCORE_MAX})
Esempio n. 2
0
 def add(self, proxy: Proxy, score=PROXY_SCORE_INIT) -> int:
     """
     往有序集合中添加代理并设置分数
     :param proxy: proxy, ip:port, like 8.8.8.8:88
     :param score: int score
     :return: result
     """
     if not is_valid_proxy(f'{proxy.host}:{proxy.port}'):
         logger.info(f'invalid proxy {proxy}, throw it')
         return
     if not self.exists(proxy):
         if IS_REDIS_VERSION_2:
             return self.db.zadd(REDIS_KEY, score, proxy.string())
         return self.db.zadd(REDIS_KEY, {proxy.string(): score})
Esempio n. 3
0
 def decrease(self, proxy: Proxy):
     """
     代理无效时,分数减1,等于0删除
     :param proxy: proxy
     :return: new score
     """
     if IS_REDIS_VERSION_2:
         self.db.zincrby(REDIS_KEY, proxy.string(), -1)
     else:
         self.db.zincrby(REDIS_KEY, -1, proxy.string())
     score = self.db.zscore(REDIS_KEY, proxy.string())
     logger.info(f'{proxy.string()} score decrease 1, current {score}')
     if score <= PROXY_SCORE_MIN:
         logger.info(f'{proxy.string()} current score {score}, remove')
         self.db.zrem(REDIS_KEY, proxy.string())
Esempio n. 4
0
 def exists(self, proxy: Proxy) -> bool:
     """
     if proxy exists
     :param proxy: proxy
     :return: if exists, bool
     """
     return not self.db.zscore(REDIS_KEY, proxy.string()) is None
Esempio n. 5
0
 def parse(self, html):
     doc = pq(html)
     trs = doc('.cont br').items()
     for tr in trs:
         line = tr[0].tail
         match = re.search(r'(\d+\.\d+\.\d+\.\d+):(\d+)', line)
         if match:
             host = match.group(1)
             port = match.group(2)
             yield Proxy(host=host, port=port)
Esempio n. 6
0
def convert_proxy_or_proxies(data):
    """
    convert list of str to valid proxies or proxy
    :param data:
    :return:
    """
    if not data:
        return None
    if isinstance(data, list):
        result = []
        for item in data:
            item = item.strip()
            if not is_valid_proxy(item): continue
            host, port = item.split(':')
            result.append(Proxy(host=host, port=int(port)))
        return result
    if isinstance(data, str) and is_valid_proxy(data):
        host, port = data.split(':')
        return Proxy(host=host, port=int(port))
Esempio n. 7
0
def convert_proxy_or_proxies(data):
    """
    convert list of str to valid proxies or proxy
    :param data:
    :return:
    """
    if not data:
        return None
    # if list of proxies
    if isinstance(data, list):
        result = []
        for item in data:
            # skip invalid item
            item = item.strip()
            if not is_valid_proxy(item): continue
            pattern = re.compile(
                r'((?P<username>\S*?)\:(?P<password>\S*?)@)?(?P<ip>[\d]{1,3}\.[\d]{1,3}\.[\d]{1,3}\.[\d]{1,3})\:(?P<port>\d*)'
            )
            match = re.search(pattern, item)
            username = match.groupdict()['username']
            password = match.groupdict()['password']
            ip = match.groupdict()['ip']
            port = match.groupdict()['port']
            result.append(
                Proxy(host=ip,
                      port=int(port),
                      username=username,
                      password=password))
        return result
    if isinstance(data, str) and is_valid_proxy(data):
        pattern = re.compile(
            r'((?P<username>\S*?)\:(?P<password>\S*?)@)?(?P<ip>[\d]{1,3}\.[\d]{1,3}\.[\d]{1,3}\.[\d]{1,3})\:(?P<port>\d*)'
        )
        match = re.search(pattern, data)
        username = match.groupdict()['username']
        password = match.groupdict()['password']
        ip = match.groupdict()['ip']
        port = match.groupdict()['port']
        return Proxy(host=ip,
                     port=int(port),
                     username=username,
                     password=password)
Esempio n. 8
0
 def parse(self, html):
     """
     parse html file to get proxies
     :return:
     """
     doc = pq(html)
     items = doc('.wlist ul.l2').items()
     for item in items:
         host = item.find('span:first-child').text()
         port = int(item.find('span:nth-child(2)').text())
         yield Proxy(host=host, port=port)
Esempio n. 9
0
 def parse(self, html):
     """
     parse html file to get proxies
     :return:
     """
     ip_address = re.compile('<tr>\s*<td>(.*?)</td>\s*<td>(.*?)</td>')
     # \s * 匹配空格,起到换行作用
     re_ip_address = ip_address.findall(html)
     for address, port in re_ip_address:
         proxy = Proxy(host=address.strip(), port=int(port.strip()))
         yield proxy
Esempio n. 10
0
 def parse(self, html):
     """
     parse html file to get proxies
     :return:
     """
     doc = pq(html)
     trs = doc('.containerbox table tr:gt(0)').items()
     for tr in trs:
         host = tr.find('td:nth-child(1)').text()
         port = int(tr.find('td:nth-child(2)').text())
         yield Proxy(host=host, port=port)
Esempio n. 11
0
 def parse(self, html):
     """
     parse html file to get proxies
     :return:
     """
     doc = pq(html)
     trs = doc('#ip_list .tr.ip_tr').items()
     for tr in trs:
         host = tr.find('div:nth-child(1)').text()
         port = tr.find('div:nth-child(2)').text()
         yield Proxy(host=host, port=port)
Esempio n. 12
0
    def parse(self, html):
        """
        parse html file to get proxies
        :return:
        """
        etree_html = etree.HTML(html)
        ip_ports = etree_html.xpath("//tbody/tr/td[1]/text()")

        for ip_port in ip_ports:
            host = ip_port.partition(":")[0]
            port = ip_port.partition(":")[2]
            yield Proxy(host=host, port=port)
Esempio n. 13
0
 def parse(self, html):
     """
     parse html file to get proxies
     :return:
     """
     ip_address = re.compile('([\d:\.]*)<br>')
     hosts_ports = ip_address.findall(html)
     for addr in hosts_ports:
         addr_split = addr.split(':')
         if (len(addr_split) == 2):
             host = addr_split[0]
             port = addr_split[1]
             yield Proxy(host=host, port=port)
Esempio n. 14
0
    def parse(self, html):
        """
        parse html file to get proxies
        :return:
        """

        hosts_ports = html.split('\n')
        for addr in hosts_ports:
            if (addr):
                ip_address = json.loads(addr)
                host = ip_address['host']
                port = ip_address['port']
                yield Proxy(host=host, port=port)
Esempio n. 15
0
 def parse(self, html):
     """
     parse html file to get proxies
     :return:
     """
     doc = pq(html)
     items = doc('#ip_list tr:contains(高匿)').items()
     for item in items:
         country = item.find('td.country').text()
         if not country or country.strip() != '高匿':
             continue
         host = item.find('td:nth-child(2)').text()
         port = int(item.find('td:nth-child(3)').text())
         yield Proxy(host=host, port=port)
Esempio n. 16
0
 def parse(self, html):
     """
     parse html file to get proxies
     :return:
     """
     find_tr = re.compile('<tr>(.*?)</tr>', re.S)
     trs = find_tr.findall(html)
     for s in range(1, len(trs)):
         find_ip = re.compile('<td>\s+(\d+\.\d+\.\d+\.\d+)\s+</td>', re.S)
         re_ip_address = find_ip.findall(trs[s])
         find_port = re.compile('<td>\s+(\d+)\s+</td>', re.S)
         re_port = find_port.findall(trs[s])
         for address, port in zip(re_ip_address, re_port):
             proxy = Proxy(host=address.strip(), port=int(port.strip()))
             yield proxy
Esempio n. 17
0
 def parse(self, html):
     """
     parse html file to get proxies
     :return:
     """
     doc = pq(html)('.ip').items()
     # ''.join([*filter(lambda x: x != '',re.compile('\>([\d:\.]*)\<').findall(td.html()))])
     for td in doc:
         trs = td.children()
         ip_str = ''
         for tr in trs:
             attrib = tr.attrib
             if 'style' in attrib and 'none' in tr.attrib['style']:
                 continue
             ip_str += '' if not tr.text else tr.text
         addr_split = ip_str.split(':')
         if (len(addr_split) == 2):
             host = addr_split[0]
             port = addr_split[1]
             yield Proxy(host=host, port=port)
         else:
             port = trs[-1].text
             host = ip_str.replace(port, '')
             yield Proxy(host=host, port=port)
Esempio n. 18
0
    def parse(self, html):
        """
        parse html file to get proxies
        :return:
        """

        result = json.loads(html)
        if result['code'] != 0:
            return
        MAX_PAGE = int(result['data']['last_page'])
        hosts_ports = result['data']['data']
        for ip_address in hosts_ports:
            if(ip_address):
                host = ip_address['ip']
                port = ip_address['port']
                yield Proxy(host=host, port=port)
Esempio n. 19
0
def run_tester():
    host = '96.113.165.182'
    port = '3128'
    tasks = [tester.test(Proxy(host=host, port=port))]
    tester.loop.run_until_complete(asyncio.wait(tasks))