예제 #1
0
파일: API.py 프로젝트: yooongchun/Proxy
def do_response():
    redis = RedisClient(host=HOST, port=PORT, key=KEY)
    RANDOM = str(redis.random())
    RANDOM_MAX = str(redis.random_max())
    ALL = redis.all()
    all = ''
    for ip in ALL:
        all += str(ip) + "<br />"
    ALL = all
    ALL_MAX = redis.all_max()
    all = ''
    for ip in ALL_MAX:
        all += str(ip) + "<br />"
    ALL_MAX = all
    COUNT = str(redis.count())
    WRONG = 'Sorry,wrong name or wrong password,try again...'
    if request.method == "GET":
        name = request.args.get("name", "")
        password = request.args.get("password", "")
        method = request.args.get("method", "")
        if not name:
            return app.send_static_file("index.html")
        if name == "yooongchun" and password == "121561":
            if method == "random":
                return RANDOM
            elif method == "random_max":
                return RANDOM_MAX
            elif method == "all":
                return ALL
            elif method == "count":
                return COUNT
            elif method == "all_max":
                return ALL_MAX
            else:
                return WRONG
        else:
            return WRONG
    if request.method == "POST":
        name = request.form['name']
        password = request.form['password']
        method = request.form['method']
        if name == "yooongchun" and password == "121561":
            if method == "random":
                return RANDOM
            elif method == "random_max":
                return RANDOM_MAX
            elif method == "all":
                return ALL
            elif method == "count":
                return COUNT
            elif method == "all_max":
                return ALL_MAX
            else:
                return WRONG
        else:
            return WRONG
예제 #2
0
파일: API.py 프로젝트: yooongchun/Proxy
class API(object):
    def __init__(self,
                 host="localhost",
                 port='6379',
                 key='Proxy',
                 moni_port=9999):
        self.__redis = RedisClient(host=host,
                                   port=port,
                                   password=None,
                                   key=key)
        self.__port = moni_port
        self.__host = host
        self.__db_port = port
        self.__key = key

    def __message(self, method):
        if method == "random":
            return self.__redis.random()
        elif method == "random_max":
            return self.__redis.random_max()
        elif method == "all":
            return self.__redis.all()
        elif method == "all_max":
            return self.__redis.all_max()
        elif method == "count":
            return self.__redis.count()
        else:
            return "Sorry,wrong name or wrong password,try again..."

    def run(self):
        self.__set_api()
        app.run(host="0.0.0.0", port=self.__port)

    def __set_api(self):
        global HOST
        global PORT
        global KEY
        global MONI_PORT

        HOST = self.__host
        PORT = self.__db_port
        KEY = self.__key
        MONI_PORT = self.__port
예제 #3
0
class Crawler(object):
    '''crawl proxy ip from proxy website'''
    def __init__(self, host="localhost", port='6379', key='Proxy'):
        self.__redis = RedisClient(host=host,
                                   port=port,
                                   password=None,
                                   key=key)

    def __select_crawl_func(self):
        '''select function start with "crawl_" '''
        return filter(
            lambda x: x.startswith('crawl_') and callable(getattr(self, x)),
            dir(self))

    def get_proxies(self):
        '''run all "crawl_*" function '''
        proxies = []
        funcs = self.__select_crawl_func()
        for func in funcs:
            proxy = eval("self.{}()".format(func))
            if proxy:
                proxies.append(proxy)
        return proxies

    def run(self):
        proxies = self.get_proxies()
        thread_pool = []
        for proxy in proxies:
            th = threading.Thread(target=self.__single_run, args=(proxy, ))
            thread_pool.append(th)
            th.start()
        for th in thread_pool:
            th.join()

    def __single_run(self, proxy):
        '''crawler for specific website'''
        for ip in proxy:
            # print(threading.current_thread().name, "\t", ip)
            self.__redis.add(ip)

    def __base_crawl_func(self, page_num, url_base, host, id_anonymous,
                          name_anonymous):
        '''base function for crawler'''

        urls = []
        if page_num > 1:
            for page in range(page_num):
                url = url_base.format(page + 1)
                urls.append(url)
            shuffle(urls)
        else:
            urls.append(url_base)
        for page in range(page_num):
            if page % 10 == 0:
                headers = UserAgent(host).headers()
            try:
                if page % 5 == 0:
                    proxy = self.__redis.random_max()
            except Exception:
                proxy = None
            url = urls[page]
            try:
                if proxy:
                    proxies = {"http": "http://" + proxy}
                    response = requests.get(url=url,
                                            headers=headers,
                                            proxies=proxies,
                                            timeout=15)
                else:
                    response = requests.get(url=url,
                                            headers=headers,
                                            timeout=15)
            except Exception:
                # print(threading.current_thread().name, "Request url error:", url)
                continue
            if not response.status_code == 200:
                continue
            for code in ['utf-8', 'gbk', 'gb2312']:
                try:
                    html = response.content.decode(code)
                    break
                except Exception:
                    # print('code error:{}'.format(code))
                    pass
            if not html:
                continue
            soup = BeautifulSoup(html, "lxml")
            tds = soup.find_all("td")
            for index, td in enumerate(tds):
                text = re.sub(r"[\s\n\t]+", "", td.text)
                rule = r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$"
                if not re.match(rule, text):
                    continue
                if name_anonymous not in re.sub(
                        r"[\s\n\t]+", "", tds[index + id_anonymous - 1].text):
                    continue
                IP = re.sub(r"[\s\n\t]+", "", tds[index + 0].text)
                PORT = re.sub(r"[\s\n\t]+", "", tds[index + 1].text)
                proxy = "{}:{}".format(IP, PORT)
                yield proxy

    def crawl_xici(self):
        '''crawl proxy ip from xici website'''
        page_num = 3336
        url_base = "http://www.xicidaili.com/nn/{}"
        host = "www.xicidaili.com"
        id_anonymous = 4
        name_anonymous = '高匿名'

        return self.__base_crawl_func(page_num, url_base, host, id_anonymous,
                                      name_anonymous)

    def crawl_kuaidaili(self):
        '''crawl proxy ip from kuaidaili website'''
        page_num = 2367
        url_base = "https://www.kuaidaili.com/free/inha/{}"
        host = "www.kuaidaili.com"
        id_anonymous = 3
        name_anonymous = '高匿名'

        return self.__base_crawl_func(page_num, url_base, host, id_anonymous,
                                      name_anonymous)

    def crawl_66(self):
        '''crawl proxy ip from 66 website'''
        page_num = 1288
        url_base = "http://www.66ip.cn/{}.html"
        host = "www.66ip.cn"
        id_anonymous = 4
        name_anonymous = '高匿代理'

        return self.__base_crawl_func(page_num, url_base, host, id_anonymous,
                                      name_anonymous)

    def crawl_yqie(self):
        '''crawl proxy ip from yqie website'''
        page_num = 1
        url_base = "http://ip.yqie.com/ipproxy.htm"
        host = "ip.yqie.com"
        id_anonymous = 4
        name_anonymous = '高匿'

        return self.__base_crawl_func(page_num, url_base, host, id_anonymous,
                                      name_anonymous)

    def crawl_yundaili(self):
        '''crawl proxy ip from yundaili website'''
        page_num = 7
        url_base = "http://www.ip3366.net/?stype=1&page={}"
        host = "www.ip3366.net"
        id_anonymous = 3
        name_anonymous = '高匿代理IP'

        return self.__base_crawl_func(page_num, url_base, host, id_anonymous,
                                      name_anonymous)