def freeProxyWallThird(): urls = ['https://list.proxylistplus.com/Fresh-HTTP-Proxy-List-1'] request = WebRequest() for url in urls: r = request.get(url, timeout=10) proxies = re.findall(r'<td>(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})</td>[\s\S]*?<td>(\d+)</td>', r.text) for proxy in proxies: yield ':'.join(proxy)
def freeProxyWallFirst(): """ 墙外网站 cn-proxy :return: """ urls = ['http://cn-proxy.com/', 'http://cn-proxy.com/archives/218'] request = WebRequest() for url in urls: r = request.get(url, timeout=10) proxies = re.findall(r'<td>(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})</td>[\w\W]<td>(\d+)</td>', r.text) for proxy in proxies: yield ':'.join(proxy)
def freeProxyTen(): """ 云代理 http://www.ip3366.net/free/ :return: """ urls = ['http://www.ip3366.net/free/'] request = WebRequest() for url in urls: r = request.get(url, timeout=10) proxies = re.findall(r'<td>(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})</td>[\s\S]*?<td>(\d+)</td>', r.text) for proxy in proxies: yield ":".join(proxy)
def freeProxyNinth(): """ 码农代理 https://proxy.coderbusy.com/ 已停用 :return: """ urls = ['https://proxy.coderbusy.com/classical/country/cn.aspx?page=1'] request = WebRequest() for url in urls: r = request.get(url, timeout=10) proxies = re.findall('data-ip="(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})".+?>(\d+)</td>', r.text) for proxy in proxies: yield ':'.join(proxy)
def freeProxyWallSecond(): """ https://proxy-list.org/english/index.php :return: """ urls = ['https://proxy-list.org/english/index.php?p=%s' % n for n in range(1, 10)] request = WebRequest() import base64 for url in urls: r = request.get(url, timeout=10) proxies = re.findall(r"Proxy\('(.*?)'\)", r.text) for proxy in proxies: yield base64.b64decode(proxy).decode()
def freeProxySixth(): """ 讯代理 http://www.xdaili.cn/ 已停用 :return: """ url = 'http://www.xdaili.cn/ipagent/freeip/getFreeIps?page=1&rows=10' request = WebRequest() try: res = request.get(url, timeout=10).json() for row in res['RESULT']['rows']: yield '{}:{}'.format(row['ip'], row['port']) except Exception as e: pass
def freeProxyEight(): """ 秘密代理 http://www.mimiip.com 已停用 """ url_gngao = ['http://www.mimiip.com/gngao/%s' % n for n in range(1, 2)] # 国内高匿 url_gnpu = ['http://www.mimiip.com/gnpu/%s' % n for n in range(1, 2)] # 国内普匿 url_gntou = ['http://www.mimiip.com/gntou/%s' % n for n in range(1, 2)] # 国内透明 url_list = url_gngao + url_gnpu + url_gntou request = WebRequest() for url in url_list: r = request.get(url, timeout=10) proxies = re.findall(r'<td>(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})</td>[\w\W].*<td>(\d+)</td>', r.text) for proxy in proxies: yield ':'.join(proxy)
def freeProxySecond(count=20): """ 代理66 http://www.66ip.cn/ :param count: 提取数量 :return: """ urls = [ "http://www.66ip.cn/mo.php?sxb=&tqsl={count}&port=&export=&ktip=&sxa=&submit=%CC%E1++%C8%A1&textarea=", "http://www.66ip.cn/nmtq.php?getnum={count}" "&isp=0&anonymoustype=0&start=&ports=&export=&ipaddress=&area=1&proxytype=2&api=66ip", ] request = WebRequest() for _ in urls: url = _.format(count=count) html = request.get(url).content ips = re.findall(r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,5}", html) for ip in ips: yield ip.strip()
def freeProxyEleven(): """ IP海 http://www.iphai.com/free/ng :return: """ urls = [ 'http://www.iphai.com/free/ng', 'http://www.iphai.com/free/np', 'http://www.iphai.com/free/wg', 'http://www.iphai.com/free/wp' ] request = WebRequest() for url in urls: r = request.get(url, timeout=10) proxies = re.findall(r'<td>\s*?(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\s*?</td>[\s\S]*?<td>\s*?(\d+)\s*?</td>', r.text) for proxy in proxies: yield ":".join(proxy)