def __init__(self): """秘密代理的IP抓取""" self.getter = GETTER(rtimes=10) self.cm = ConnMysql() self.bf = BloomFilter(key='allip') self.url = [ "http://www.mimiip.com/gngao/{}", # 高匿代理IP "http://www.mimiip.com/gnpu/{}", # 普匿代理IP "http://www.mimiip.com/gntou/{}", # 透明代理IP "http://www.mimiip.com/hw/{}" # 国外代理IP ]
def __init__(self): """ 蜻蜓代理的爬虫 https://proxy.horocn.com/free-proxy.html?page={} """ self.getter = GETTER(rtimes=10) self.cm = ConnMysql() self.bf = BloomFilter(key='allip') self.img = ImagePort() self.port = "12345" self.url = "https://proxy.horocn.com/free-proxy.html?page={}"
def __init__(self): """360代理的爬虫""" self.getter = GETTER(rtimes=10) self.cm = ConnMysql() self.bf = BloomFilter(key='allip') self.url = [ "http://www.swei360.com/free/?page={}", # 国内高匿代理 "http://www.swei360.com/free/?stype=2&page={}", # 国内普通代理 "http://www.swei360.com/free/?stype=3&page={}", # 国外高匿代理 "http://www.swei360.com/free/?stype=4&page={}" # 国外普通代理 ]
def __init__(self): self.getter = GETTER(rtimes=10) self.cm = ConnMysql() self.bf = BloomFilter(key='allip') self.url_list = [ 'https://www.rmccurdy.com/scripts/proxy/output/http/ALL', 'https://www.rmccurdy.com/scripts/proxy/output/socks/ALL', 'https://www.rmccurdy.com/scripts/proxy/proxylist.txt', 'http://www.proxylists.net/http_highanon.txt', 'http://ab57.ru/downloads/proxyold.txt' ]
def __init__(self): """码农很忙代理爬虫""" self.getter = GETTER(rtimes=10) self.cm = ConnMysql() self.bf = BloomFilter(key='allip') self.url = [ 'https://proxy.coderbusy.com/', # 首页 'https://proxy.coderbusy.com/classical/https-ready.aspx?page={}', # HTTPS代理 'https://proxy.coderbusy.com/classical/post-ready.aspx?page={}', # 支持POST的代理 'https://proxy.coderbusy.com/classical/anonymous-type/transparent.aspx?page={}', # 透明代理 'https://proxy.coderbusy.com/classical/anonymous-type/anonymous.aspx?page={}', # 匿名代理 'https://proxy.coderbusy.com/classical/anonymous-type/highanonymous.aspx?page={}', # 高匿代理 ]
def __init__(self): """ProxyDB代理的爬虫""" self.getter = GETTER(rtimes=10) self.cm = ConnMysql() self.bf = BloomFilter(key='allip') self.url = "http://proxydb.net/?offset={}"
def __init__(self): """cool-proxy.net代理的爬虫""" self.getter = GETTER(rtimes=10) self.cm = ConnMysql() self.bf = BloomFilter(key='allip') self.url = "https://www.cool-proxy.net/proxies/http_proxy_list/sort:score/direction:desc/page:{}"
def __init__(self): """data5u代理的爬虫""" self.getter = GETTER(rtimes=10) self.cm = ConnMysql() self.bf = BloomFilter(key='allip') self.url = "http://www.data5u.com/free/index.shtml"
def __init__(self): """全网代理IP的爬虫""" self.getter = GETTER(rtimes=10) self.cm = ConnMysql() self.bf = BloomFilter(key='allip') self.url = "http://www.goubanjia.com/"
def __init__(self): self.getter = GETTER(rtimes=10) self.cm = ConnMysql() self.bf = BloomFilter(key='allip') self.url = "https://list.proxylistplus.com/Fresh-HTTP-Proxy-List-{}"
def __init__(self): """快代理的爬虫""" self.getter = GETTER(rtimes=10) self.cm = ConnMysql() self.bf = BloomFilter(key='allip')
def __init__(self): """3464网站的爬虫""" self.getter = GETTER(rtimes=10) self.cm = ConnMysql() self.bf = BloomFilter(key='allip') self.url = "http://www.3464.com/data/Proxy/http/"
def __init__(self): self.getter = GETTER(rtimes=10) self.cm = ConnMysql() self.bf = BloomFilter(key='allip') self.url = "http://www.youdaili.net/Daili/http/"
def __init__(self): """3366代理网站的爬虫""" self.getter = GETTER(rtimes=10) self.cm = ConnMysql() self.bf = BloomFilter(key='allip') self.url = "http://www.ip3366.net/?stype={}&page={}"
def __init__(self): self.getter = GETTER(rtimes=10) self.cm = ConnMysql() self.bf = BloomFilter(key='allip') self.url = 'http://www.ip181.com/'
def __init__(self): """66ip代理的爬虫""" self.getter = GETTER(rtimes=10) self.cm = ConnMysql() self.bf = BloomFilter(key='allip') self.url = "http://www.66ip.cn/{}.html"