def download(url, timeout=10, user_agent='wswp', num_retries=2): print('Downloading: {url}'.format(url=url)) headers = {'User-agent': user_agent} try: redis = RedisOperater() proxy = redis.getRandomUsable() if proxy: proxies = { "http": "http://{proxy}".format(proxy=proxy), "https": "https://{proxy}".format(proxy=proxy) } response = requests.get(url, proxies=proxies, headers=headers, timeout=timeout) else: response = requests.get(url, headers=headers, timeout=timeout) code = response.status_code if (num_retries > 0): if (500 <= code < 600): return download(url, timeout, user_agent, num_retries - 1) else: return None html = response.text return html except requests.ReadTimeout as ex: print('Download Timeout: {ex}'.format(ex=ex)) return download(url, timeout, user_agent, num_retries - 1) except Exception as ex: print('Download error: {ex}'.format(ex=ex))
class blobStaticProxy(): def __init__(self): self.all_proxies = [] self.vaild_proxies = [] self.invalid_proxies = [] self.roper = RedisOperater() with open('blob/blob.list', 'r', encoding='utf-8') as f: content = f.read() self.all_proxies = re.findall(r'\d+.\d+.\d+.\d+:\d+', content) self._proxyFilter() def _proxyFilter(self): for item in self.all_proxies: if validUsefulProxy(item): self.vaild_proxies.append(item) self.roper.addworkin(item) else: self.invalid_proxies.append(item) def _getFilterReport(self): print('The length of all proxies array is {}'.format( len(self.all_proxies))) print('And valid proxies array is {}'.format(len(self.vaild_proxies))) print(self.vaild_proxies) print('And invalid proxies array is {}'.format( len(self.invalid_proxies)))
class xiciFreeApi(): def __init__(self): self.all_proxies = [] self.vaild_proxies = [] self.invalid_proxies = [] self.roper = RedisOperater() list = download2(XICI_API_URL) self.all_proxies = list.split('\r\n') self._proxyFilter() self._getFilterReport() def _proxyFilter(self): for item in self.all_proxies: if validUsefulProxy(item): self.vaild_proxies.append(item) self.roper.addworkin(item) else: self.invalid_proxies.append(item) def _getFilterReport(self): print('The length of all proxies array is {}'.format( len(self.all_proxies))) print('And valid proxies array is {}'.format(len(self.vaild_proxies))) print(self.vaild_proxies) print('And invalid proxies array is {}'.format( len(self.invalid_proxies)))
class blobFreeApi(): def __init__(self): self.all_proxies = [] self.vaild_proxies = [] self.invalid_proxies = [] self.roper = RedisOperater() content = download(BLOB_API_URL) self.all_proxies = re.findall(r'\d+.\d+.\d+.\d+:\d+', content) self._proxyFilter() def _proxyFilter(self): for item in self.all_proxies: if validUsefulProxy(item): self.vaild_proxies.append(item) self.roper.addworkin(item) else: self.invalid_proxies.append(item) def _getFilterReport(self): print('The length of all proxies array is {}'.format( len(self.all_proxies))) print('And valid proxies array is {}'.format(len(self.vaild_proxies))) print(self.vaild_proxies) print('And invalid proxies array is {}'.format( len(self.invalid_proxies)))
def __init__(self): self.all_proxies = [] self.vaild_proxies = [] self.invalid_proxies = [] self.roper = RedisOperater() content = download(BLOB_API_URL) self.all_proxies = re.findall(r'\d+.\d+.\d+.\d+:\d+', content) self._proxyFilter()
def __init__(self): self.all_proxies = [] self.vaild_proxies = [] self.invalid_proxies = [] self.roper = RedisOperater() with open('blob/blob.list', 'r', encoding='utf-8') as f: content = f.read() self.all_proxies = re.findall(r'\d+.\d+.\d+.\d+:\d+', content) self._proxyFilter()
def __init__(self): self.all_proxies = [] self.vaild_proxies = [] self.invalid_proxies = [] self.roper = RedisOperater() list = download2(XICI_API_URL) self.all_proxies = list.split('\r\n') self._proxyFilter() self._getFilterReport()
class kuaiIntrFreeCrawler(): def __init__(self): self.roper = RedisOperater() self._crawler() def _crawler(self): for page in range(1, 11): try: url = 'http://www.kuaidaili.com/free/intr/{page}/'.format( page=page) html = download2(url) tree = lxml.html.fromstring(html) ips = tree.cssselect('td[data-title="IP"]') ports = tree.cssselect('td[data-title="PORT"]') for item in range(0, len(ips)): proxy = '{ip}:{port}'.format(ip=ips[item].text, port=ports[item].text) print(proxy) self.roper.addcache(proxy) time.sleep(10) except Exception as ex: print(ex)
class xiciNNFreeCrawler(): def __init__(self): self.roper = RedisOperater() self._crawler() def _crawler(self): for page in range(1, 11): try: url = 'http://www.xicidaili.com/nn/{page}'.format(page=page) html = download2(url) tree = lxml.html.fromstring(html) tds = tree.cssselect('td') count = int(len(tds) / 10) print(count) for line in range(0, count): ip = tds[line * 10 + 1].text port = tds[line * 10 + 2].text proxy = '{ip}:{port}'.format(ip=ip, port=port) print(proxy) self.roper.addcache(proxy) time.sleep(10) except Exception as ex: print(ex)
class xiciStaticProxy(): def __init__(self): self.roper = RedisOperater() self._crawler() def _crawler(self): for page in range(1, 20): try: filename = 'xici/xici{}.list'.format(page) with open(filename, 'r', encoding='utf-8') as f: html = f.read() tree = lxml.html.fromstring(html) tds = tree.cssselect('td') count = int(len(tds) / 10) print(count) for line in range(0, count): ip = tds[line * 10 + 1].text port = tds[line * 10 + 2].text proxy = '{ip}:{port}'.format(ip=ip, port=port) print(proxy) self.roper.addcache(proxy) time.sleep(10) except Exception as ex: print(ex)
def __init__(self): self.roper = RedisOperater() self._crawler()
""" ------------------------------------------------- Filename: server.py Author: Helyao Description: Support usable proxy-service by Flask API ------------------------------------------------- Change Logs: 2017-06-02 3:00pm create ------------------------------------------------- """ from flask import Flask from store.operRedis import RedisOperater, UsRedisOperater app = Flask(__name__) roper = RedisOperater() roper_us = UsRedisOperater() @app.route('/') def index(): proxy = roper.getRandomUsable() return proxy @app.route('/out') def out(): proxy = roper_us.getRandomUsable() return proxy def run(): app.run(host='0.0.0.0', port=5000)