Exemple #1
0
def download(url, timeout=10, user_agent='wswp', num_retries=2):
    print('Downloading: {url}'.format(url=url))
    headers = {'User-agent': user_agent}
    try:
        redis = RedisOperater()
        proxy = redis.getRandomUsable()
        if proxy:
            proxies = {
                "http": "http://{proxy}".format(proxy=proxy),
                "https": "https://{proxy}".format(proxy=proxy)
            }
            response = requests.get(url,
                                    proxies=proxies,
                                    headers=headers,
                                    timeout=timeout)
        else:
            response = requests.get(url, headers=headers, timeout=timeout)
        code = response.status_code
        if (num_retries > 0):
            if (500 <= code < 600):
                return download(url, timeout, user_agent, num_retries - 1)
        else:
            return None
        html = response.text
        return html
    except requests.ReadTimeout as ex:
        print('Download Timeout: {ex}'.format(ex=ex))
        return download(url, timeout, user_agent, num_retries - 1)
    except Exception as ex:
        print('Download error: {ex}'.format(ex=ex))
Exemple #2
0
class blobStaticProxy():
    def __init__(self):
        self.all_proxies = []
        self.vaild_proxies = []
        self.invalid_proxies = []
        self.roper = RedisOperater()
        with open('blob/blob.list', 'r', encoding='utf-8') as f:
            content = f.read()
        self.all_proxies = re.findall(r'\d+.\d+.\d+.\d+:\d+', content)
        self._proxyFilter()

    def _proxyFilter(self):
        for item in self.all_proxies:
            if validUsefulProxy(item):
                self.vaild_proxies.append(item)
                self.roper.addworkin(item)
            else:
                self.invalid_proxies.append(item)

    def _getFilterReport(self):
        print('The length of all proxies array is {}'.format(
            len(self.all_proxies)))
        print('And valid proxies array is {}'.format(len(self.vaild_proxies)))
        print(self.vaild_proxies)
        print('And invalid proxies array is {}'.format(
            len(self.invalid_proxies)))
Exemple #3
0
class xiciFreeApi():
    def __init__(self):
        self.all_proxies = []
        self.vaild_proxies = []
        self.invalid_proxies = []
        self.roper = RedisOperater()
        list = download2(XICI_API_URL)
        self.all_proxies = list.split('\r\n')
        self._proxyFilter()
        self._getFilterReport()

    def _proxyFilter(self):
        for item in self.all_proxies:
            if validUsefulProxy(item):
                self.vaild_proxies.append(item)
                self.roper.addworkin(item)
            else:
                self.invalid_proxies.append(item)

    def _getFilterReport(self):
        print('The length of all proxies array is {}'.format(
            len(self.all_proxies)))
        print('And valid proxies array is {}'.format(len(self.vaild_proxies)))
        print(self.vaild_proxies)
        print('And invalid proxies array is {}'.format(
            len(self.invalid_proxies)))
Exemple #4
0
class blobFreeApi():
    def __init__(self):
        self.all_proxies = []
        self.vaild_proxies = []
        self.invalid_proxies = []
        self.roper = RedisOperater()
        content = download(BLOB_API_URL)
        self.all_proxies = re.findall(r'\d+.\d+.\d+.\d+:\d+', content)
        self._proxyFilter()

    def _proxyFilter(self):
        for item in self.all_proxies:
            if validUsefulProxy(item):
                self.vaild_proxies.append(item)
                self.roper.addworkin(item)
            else:
                self.invalid_proxies.append(item)

    def _getFilterReport(self):
        print('The length of all proxies array is {}'.format(
            len(self.all_proxies)))
        print('And valid proxies array is {}'.format(len(self.vaild_proxies)))
        print(self.vaild_proxies)
        print('And invalid proxies array is {}'.format(
            len(self.invalid_proxies)))
Exemple #5
0
 def __init__(self):
     self.all_proxies = []
     self.vaild_proxies = []
     self.invalid_proxies = []
     self.roper = RedisOperater()
     content = download(BLOB_API_URL)
     self.all_proxies = re.findall(r'\d+.\d+.\d+.\d+:\d+', content)
     self._proxyFilter()
Exemple #6
0
 def __init__(self):
     self.all_proxies = []
     self.vaild_proxies = []
     self.invalid_proxies = []
     self.roper = RedisOperater()
     with open('blob/blob.list', 'r', encoding='utf-8') as f:
         content = f.read()
     self.all_proxies = re.findall(r'\d+.\d+.\d+.\d+:\d+', content)
     self._proxyFilter()
Exemple #7
0
 def __init__(self):
     self.all_proxies = []
     self.vaild_proxies = []
     self.invalid_proxies = []
     self.roper = RedisOperater()
     list = download2(XICI_API_URL)
     self.all_proxies = list.split('\r\n')
     self._proxyFilter()
     self._getFilterReport()
Exemple #8
0
class kuaiIntrFreeCrawler():
    def __init__(self):
        self.roper = RedisOperater()
        self._crawler()

    def _crawler(self):
        for page in range(1, 11):
            try:
                url = 'http://www.kuaidaili.com/free/intr/{page}/'.format(
                    page=page)
                html = download2(url)
                tree = lxml.html.fromstring(html)
                ips = tree.cssselect('td[data-title="IP"]')
                ports = tree.cssselect('td[data-title="PORT"]')
                for item in range(0, len(ips)):
                    proxy = '{ip}:{port}'.format(ip=ips[item].text,
                                                 port=ports[item].text)
                    print(proxy)
                    self.roper.addcache(proxy)
                time.sleep(10)
            except Exception as ex:
                print(ex)
Exemple #9
0
class xiciNNFreeCrawler():
    def __init__(self):
        self.roper = RedisOperater()
        self._crawler()

    def _crawler(self):
        for page in range(1, 11):
            try:
                url = 'http://www.xicidaili.com/nn/{page}'.format(page=page)
                html = download2(url)
                tree = lxml.html.fromstring(html)
                tds = tree.cssselect('td')
                count = int(len(tds) / 10)
                print(count)
                for line in range(0, count):
                    ip = tds[line * 10 + 1].text
                    port = tds[line * 10 + 2].text
                    proxy = '{ip}:{port}'.format(ip=ip, port=port)
                    print(proxy)
                    self.roper.addcache(proxy)
                time.sleep(10)
            except Exception as ex:
                print(ex)
Exemple #10
0
class xiciStaticProxy():
    def __init__(self):
        self.roper = RedisOperater()
        self._crawler()

    def _crawler(self):
        for page in range(1, 20):
            try:
                filename = 'xici/xici{}.list'.format(page)
                with open(filename, 'r', encoding='utf-8') as f:
                    html = f.read()
                    tree = lxml.html.fromstring(html)
                    tds = tree.cssselect('td')
                    count = int(len(tds) / 10)
                    print(count)
                    for line in range(0, count):
                        ip = tds[line * 10 + 1].text
                        port = tds[line * 10 + 2].text
                        proxy = '{ip}:{port}'.format(ip=ip, port=port)
                        print(proxy)
                        self.roper.addcache(proxy)
                time.sleep(10)
            except Exception as ex:
                print(ex)
Exemple #11
0
 def __init__(self):
     self.roper = RedisOperater()
     self._crawler()
Exemple #12
0
"""
-------------------------------------------------
    Filename:   server.py
    Author:     Helyao
    Description:
        Support usable proxy-service by Flask API
-------------------------------------------------
    Change Logs:
    2017-06-02 3:00pm   create
-------------------------------------------------
"""
from flask import Flask
from store.operRedis import RedisOperater, UsRedisOperater

app = Flask(__name__)
roper = RedisOperater()
roper_us = UsRedisOperater()

@app.route('/')
def index():
    proxy = roper.getRandomUsable()
    return proxy

@app.route('/out')
def out():
    proxy = roper_us.getRandomUsable()
    return proxy

def run():
    app.run(host='0.0.0.0', port=5000)