class Getter(): def __init__(self): self.redis = RedisClient() self.crawler = Crawler() def is_over_threshold(self): ''' 判断是否达到了代理池限制 :return: ''' if self.redis.count() >= POOL_UPPER_THRESHOLD: return True else: return False def run(self): print('获取器开始执行') if not self.is_over_threshold(): for callback_label in range(self.crawler.__CrawlFuncCount__): callback = self.crawler.__CrawlFunc__[callback_label] # 获取代理 proxies = self.crawler.get_proxies(callback) sys.stdout.flush() for proxy in proxies: self.redis.add(proxy)
class Tester(object): def __init__(self): self.redis = RedisClient() # async def test_single_proxy(self, proxy): # """ # 测试单个代理 # :param proxy: # :return: # """ # conn = aiohttp.TCPConnector(verify_ssl=False) # async with aiohttp.ClientSession(connector=conn) as session: # try: # if isinstance(proxy, bytes): # proxy = proxy.decode('utf-8') # real_proxy = 'http://' + proxy # print('正在测试', proxy) # async with session.get(TEST_URL, proxy=real_proxy, timeout=15, allow_redirects=False) as response: # if response.status in VALID_STATUS_CODES: # self.redis.max(proxy) # print('代理可用', proxy) # else: # self.redis.decrease(proxy) # print('请求响应码不合法 ', response.status, 'IP', proxy) # except (ClientError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError): # self.redis.decrease(proxy) # print('代理请求失败', proxy) def run(self): """ 测试主函数 :return: """ print('测试器开始运行') try: count = self.redis.count() print('当前剩余', count, '个代理') for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(i + BATCH_TEST_SIZE, count) print('正在测试第', start + 1, '-', stop, '个代理') test_proxies = self.redis.batch(start, stop) loop = asyncio.get_event_loop() tasks = [ self.test_single_proxy(proxy) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait(tasks)) sys.stdout.flush() time.sleep(5) except Exception as e: print('测试器发生错误', e.args)
#!/usr/bin/env python3 # -*- coding: utf-8 -*- # @contact: [email protected] # @software: PyCharm # @time: 2019/12/26 下午7:34 # @site: www.gongyanli.com # @file: importer.py from ProxyPool.proxypool.db import RedisClient conn = RedisClient() def set(proxy): result = conn.add(proxy) print('录入成功' if result else '录入失败') def scan(): print('请输入代理,输入exit退出读取') while True: proxy = input() if proxy == 'exit': break set(proxy) if __name__ == '__main__': scan()
def __init__(self): self.redis = RedisClient()
class Tester(object): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): """ 测试单个代理 :param proxy: :return: """ headers = { "Connection": "keep-alive", "Host": "www.sogou.com", "Pragma": "no-cache", "User-Agent": 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36', } conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在测试' + proxy) async with session.get(TEST_URL, proxy=real_proxy, timeout=15, allow_redirects=False, headers=headers) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) print('代理可用' + proxy) else: self.redis.decrease(proxy) print('请求响应码不合法 ' + str(response.status) + 'IP' + proxy) except (ClientError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError): self.redis.decrease(proxy) print('代理请求失败{}'.format(proxy)) def run(self): """ 测试主函数 :return: """ print('测试器开始运行') try: count = self.redis.count() print('当前剩余{}个代理'.format(count)) for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(i + BATCH_TEST_SIZE, count) print('正在测试第{}-{}个代理'.format(start + 1, stop)) test_proxies = self.redis.batch(start, stop) loop = asyncio.get_event_loop() tasks = [ self.test_single_proxy(proxy) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait(tasks)) sys.stdout.flush() time.sleep(5) except Exception as e: print('测试器发生错误{}'.format(e.args))
def __init__(self): self.redis = RedisClient() self.crawler = Crawler()
def get_conn(): if not hasattr(g, 'redis'): g.redis = RedisClient() return g.redis