Esempio n. 1
0
 def freeProxy1(page_num=20, work_proxy=None) -> List[str]:
     """
     https://www.7yip.cn/free/?action=china&page={page}
     齐云代理
     :param max_page:
     :return:
     """
     src_name = '齐云代理'
     print("使用来源1: ", src_name)
     proxies = []
     url_format = 'https://www.7yip.cn/free/?action=china&page={page}'
     for i in random.sample(list(range(page_num)), k=2):
         url = url_format.format(page=i + 1)
         good, content_type, res = make_req(url, proxies=work_proxy)
         if good and content_type == 'text/html;charset=utf-8':
             items = re.findall(
                 r'<td.*?>(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})</td>[\s\S]*?<td.*?>(\d+)</td>',
                 res.text)
             cnt = 0
             for item in items:
                 raw_ip = item[0] + ":" + item[1]
                 if ProxyFactory.proxy_str_check(raw_ip):
                     proxies.append('http://' + raw_ip)
                     cnt += 1
                 else:
                     pass
             print("---->通过来源:{0} {1} 获得{2}个IP".format(src_name, url, cnt))
         else:
             why = content_type
             print("---->通过来源:{0} {1} 获得IP失败 原因为{2}".format(
                 src_name, url, why))
     return proxies
Esempio n. 2
0
 def freeProxy0(work_proxy=None) -> List[str]:
     """
     http://118.24.52.95/get_all/
     https://github.com/jhao104/proxy_pool/tree/1a3666283806a22ef287fba1a8efab7b94e94bac
     每日500次限额 超出限额后使用代理
     :param max_page:
     :return:
     """
     src_name = 'IP代理接口jhao104'
     print("使用来源0: ", src_name)
     proxies = []
     url_format = 'http://118.24.52.95/get_all/'
     url = url_format
     good, content_type, res = make_req(url, proxies=work_proxy)
     if good and content_type == 'application/json':
         items = res.json(encoding='utf-8')
         cnt = 0
         for item in items:
             raw_ip = item['proxy']
             if ProxyFactory.proxy_str_check(raw_ip):
                 proxies.append('http://' + raw_ip)
                 cnt += 1
             else:
                 pass
         print("---->通过来源:{0} {1} 获得{2}个IP".format(src_name, url, cnt))
     else:
         why = content_type
         print("---->通过来源:{0} {1} 获得IP失败 原因为{2}".format(src_name, url, why))
     return proxies
Esempio n. 3
0
 def freeProxy7(page_num=100, work_proxy=None) -> List[str]:
     """
     http://www.xiladaili.com/gaoni/{page}/
     西拉代理
     :param max_page:
     :return:
     """
     src_name = '西拉代理'
     print("使用来源7: ", src_name)
     proxies = []
     url_format = 'http://www.xiladaili.com/gaoni/{page}/'
     for i in random.sample(list(range(page_num)), k=2):
         url = url_format.format(page=i + 1)
         good, content_type, res = make_req(url,
                                            proxies=work_proxy,
                                            timeout=15)
         if good and 'text/html' in content_type:
             # \s* 0或多个空白字符
             items = re.findall(
                 r'<td>(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):(\d+)</td>',
                 res.text)
             cnt = 0
             for item in items:
                 raw_ip = item[0] + ":" + item[1]
                 if ProxyFactory.proxy_str_check(raw_ip):
                     proxies.append('http://' + raw_ip)
                     cnt += 1
                 else:
                     pass
             print("---->通过来源:{0} {1} 获得{2}个IP".format(src_name, url, cnt))
         else:
             why = content_type
             print("---->通过来源:{0} {1} 获得IP失败 原因为{2}".format(
                 src_name, url, why))
     return proxies
Esempio n. 4
0
def test_IP_by_request(proxy: str, url: str, timeout: int) -> tuple:
    """

    :param proxy:
    :param url:
    :param timeout:
    :return: (isGood,content_type,result:Response)
    """
    return make_req(url, proxy, timeout)
Esempio n. 5
0
    def freeProxy3(page_num=20, work_proxy=None) -> List[str]:
        """
        http://ip.jiangxianli.com/api/proxy_ips?page={page}
        免费代理库jiangxianli
        :return:
        """
        src_name = '免费代理库jiangxianli'
        print("使用来源3: ", src_name)
        proxies = []
        url_format = "https://ip.jiangxianli.com/api/proxy_ips?page={page}"
        for i in random.sample(list(range(page_num)), k=2):
            url = url_format.format(page=i + 1)
            good, content_type, res = make_req(url, proxies=work_proxy)
            print(url, good, content_type)
            if good and content_type == 'application/json':
                result_data = res.json(encoding='utf-8')
                items = result_data.get('data', dict(data=[])).get('data', [])
                if items:
                    cnt = 0
                    for item in items:
                        raw_ip = item['ip'] + ':' + item['port']
                        if ProxyFactory.proxy_str_check(raw_ip):
                            proxies.append('http://' + raw_ip)
                            cnt += 1
                        else:
                            pass
                    print("---->通过来源:{0} {1} 获得{2}个IP".format(
                        src_name, url, cnt))
                else:
                    print("---->通过来源:{0} {1} 获得0个IP".format(src_name, url))

            else:
                why = content_type
                print("---->通过来源:{0} {1} 获得IP失败 原因为{2}".format(
                    src_name, url, why))
        return proxies
Esempio n. 6
0
 def freeProxy2(page_num=20, work_proxy=None) -> List[str]:
     """
     云代理 http://www.ip3366.net/free/
     注意频率
     :return:
     """
     src_name = '云代理'
     print("使用来源2: ", src_name)
     proxies = []
     url_formats = [
         'http://www.ip3366.net/free/?stype=1&page={page}',
         "http://www.ip3366.net/free/?stype=2&page={page}"
     ]
     for url_format in url_formats:
         for i in random.sample(list(range(page_num)), k=2):
             url = url_format.format(page=i + 1)
             good, content_type, res = make_req(url, proxies=work_proxy)
             if good and content_type == 'text/html':
                 items = re.findall(
                     r'<td>(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})</td>[\s\S]*?<td>(\d+)</td>',
                     res.text)
                 cnt = 0
                 for item in items:
                     raw_ip = item[0] + ":" + item[1]
                     if ProxyFactory.proxy_str_check(raw_ip):
                         proxies.append('http://' + raw_ip)
                         cnt += 1
                     else:
                         pass
                 print("---->通过来源:{0} {1} 获得{2}个IP".format(
                     src_name, url, cnt))
             else:
                 why = content_type
                 print("---->通过来源:{0} {1} 获得IP失败 原因为{2}".format(
                     src_name, url, why))
     return proxies