def __init__(self): """秘密代理的IP抓取""" self.getter = GETTER(rtimes=10) self.cm = ConnMysql() self.bf = BloomFilter(key='allip') self.url = [ "http://www.mimiip.com/gngao/{}", # 高匿代理IP "http://www.mimiip.com/gnpu/{}", # 普匿代理IP "http://www.mimiip.com/gntou/{}", # 透明代理IP "http://www.mimiip.com/hw/{}" # 国外代理IP ]
def __init__(self): """ 蜻蜓代理的爬虫 https://proxy.horocn.com/free-proxy.html?page={} """ self.getter = GETTER(rtimes=10) self.cm = ConnMysql() self.bf = BloomFilter(key='allip') self.img = ImagePort() self.port = "12345" self.url = "https://proxy.horocn.com/free-proxy.html?page={}"
def __init__(self): """360代理的爬虫""" self.getter = GETTER(rtimes=10) self.cm = ConnMysql() self.bf = BloomFilter(key='allip') self.url = [ "http://www.swei360.com/free/?page={}", # 国内高匿代理 "http://www.swei360.com/free/?stype=2&page={}", # 国内普通代理 "http://www.swei360.com/free/?stype=3&page={}", # 国外高匿代理 "http://www.swei360.com/free/?stype=4&page={}" # 国外普通代理 ]
def __init__(self): self.getter = GETTER(rtimes=10) self.cm = ConnMysql() self.bf = BloomFilter(key='allip') self.url_list = [ 'https://www.rmccurdy.com/scripts/proxy/output/http/ALL', 'https://www.rmccurdy.com/scripts/proxy/output/socks/ALL', 'https://www.rmccurdy.com/scripts/proxy/proxylist.txt', 'http://www.proxylists.net/http_highanon.txt', 'http://ab57.ru/downloads/proxyold.txt' ]
def __init__(self): """码农很忙代理爬虫""" self.getter = GETTER(rtimes=10) self.cm = ConnMysql() self.bf = BloomFilter(key='allip') self.url = [ 'https://proxy.coderbusy.com/', # 首页 'https://proxy.coderbusy.com/classical/https-ready.aspx?page={}', # HTTPS代理 'https://proxy.coderbusy.com/classical/post-ready.aspx?page={}', # 支持POST的代理 'https://proxy.coderbusy.com/classical/anonymous-type/transparent.aspx?page={}', # 透明代理 'https://proxy.coderbusy.com/classical/anonymous-type/anonymous.aspx?page={}', # 匿名代理 'https://proxy.coderbusy.com/classical/anonymous-type/highanonymous.aspx?page={}', # 高匿代理 ]
from Tools.CheckIP import CheckIP from Tools.DataBase.db import ConnMysql import json ci = CheckIP() cm = ConnMysql() IP_LIST = [] sql = """select `type`, `ip`, `port` from ipipip where nmd='High Anonymity' and score='100';""" cm.exe(sql) for data in cm.cursor: proxies = ci.mk_proxies(data[0], data[1], data[2]) print(proxies) IP_LIST.append(proxies) print(IP_LIST) with open('ip_pool.py', 'w') as f: f.write('IP_LIST = ' + json.dumps(IP_LIST))
def __init__(self): """data5u代理的爬虫""" self.getter = GETTER(rtimes=10) self.cm = ConnMysql() self.bf = BloomFilter(key='allip') self.url = "http://www.data5u.com/free/index.shtml"
def __init__(self): """全网代理IP的爬虫""" self.getter = GETTER(rtimes=10) self.cm = ConnMysql() self.bf = BloomFilter(key='allip') self.url = "http://www.goubanjia.com/"
def __init__(self): self.getter = GETTER(rtimes=10) self.cm = ConnMysql() self.bf = BloomFilter(key='allip') self.url = "https://list.proxylistplus.com/Fresh-HTTP-Proxy-List-{}"
def __init__(self): self.getter = GETTER(rtimes=10) self.cm = ConnMysql() self.bf = BloomFilter(key='allip') self.url = 'http://www.ip181.com/'
def __init__(self): """快代理的爬虫""" self.getter = GETTER(rtimes=10) self.cm = ConnMysql() self.bf = BloomFilter(key='allip')
def __init__(self): """3464网站的爬虫""" self.getter = GETTER(rtimes=10) self.cm = ConnMysql() self.bf = BloomFilter(key='allip') self.url = "http://www.3464.com/data/Proxy/http/"
def __init__(self): self.getter = GETTER(rtimes=10) self.cm = ConnMysql() self.bf = BloomFilter(key='allip') self.url = "http://www.youdaili.net/Daili/http/"
def __init__(self): """3366代理网站的爬虫""" self.getter = GETTER(rtimes=10) self.cm = ConnMysql() self.bf = BloomFilter(key='allip') self.url = "http://www.ip3366.net/?stype={}&page={}"
def __init__(self): """cool-proxy.net代理的爬虫""" self.getter = GETTER(rtimes=10) self.cm = ConnMysql() self.bf = BloomFilter(key='allip') self.url = "https://www.cool-proxy.net/proxies/http_proxy_list/sort:score/direction:desc/page:{}"
def __init__(self): """ProxyDB代理的爬虫""" self.getter = GETTER(rtimes=10) self.cm = ConnMysql() self.bf = BloomFilter(key='allip') self.url = "http://proxydb.net/?offset={}"
def __init__(self): """66ip代理的爬虫""" self.getter = GETTER(rtimes=10) self.cm = ConnMysql() self.bf = BloomFilter(key='allip') self.url = "http://www.66ip.cn/{}.html"