class Getter(): def __init__(self): self.redis = RedisClient() self.crawler = Crawler() def is_over_threshold(self): ''' Jude whether the number limit of proxies is reached ''' if self.redis.count() >= POOL_UPPER_THRESHOLD: return True else: return False def run(self): crawlerCount = self.crawler.__CrawlFuncCount__ print('crawlerCount: ', crawlerCount) for callback_lable in range(crawlerCount): if self.is_over_threshold(): print('The proxies in the pool is too many!') break callback = self.crawler.__CrawlFunc__[callback_lable] proxies = self.crawler.get_proxies(callback) for proxy in proxies: self.redis.add(proxy)
def tester_run(): print(' Tester is running...') redis = RedisClient() count = redis.count() proxy_batchs = [] for i in range(0, count, BATCH_TEST_SIZE): start, end = i, min(i + BATCH_TEST_SIZE, count) - 1 proxy_batchs.append(redis.batch(start, end)) pool = ThreadPool() pool.map(test_batch_proxies, proxy_batchs) pool.close() pool.join()
async def test_single_proxy(proxy): '''Test single proxy args: redis: name of redisDB proxy: proxy return: None ''' redis = RedisClient() conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as seesion: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('Testing ', proxy) async with seesion.get(TEST_URL, proxy=real_proxy, timeout=15) as response: if response.status in VALID_STATUS_CODE: redis.max(proxy) print('Proxy ', proxy, ' is valid, set max score.') else: redis.decrease(proxy) print('Response code is not valid. Proxy ', proxy, ' is invalid, decrease score.') except: redis.decrease(proxy) print('Proxy response failed. Proxy ', proxy, ' is invalid, decrease score.')
def get_conn(): ''' Get redis client object ''' if not hasattr(g, 'redis'): g.redis = RedisClient() return g.redis
class Tester(object): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): """ 测试单个代理 :param proxy: :return: """ conn = aiohttp.TCPConnector(verify_ssl=False) #防止证书报错 async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在测试', proxy) async with session.get(TEST_URL, proxy=real_proxy, timeout=15, allow_redirects=False) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) print('代理可用', proxy) else: self.redis.decrease(proxy) print('请求响应码不合法 ', response.status, 'IP', proxy) except (ClientError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError): self.redis.decrease(proxy) print('代理请求失败', proxy) def run(self): """ 测试主函数 :return: """ print('测试器开始运行') try: count = self.redis.count() print('当前剩余', count, '个代理') for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(i + BATCH_TEST_SIZE, count) print('正在测试第', start + 1, '-', stop, '个代理') test_proxies = self.redis.batch(start, stop) loop = asyncio.get_event_loop() tasks = [ self.test_single_proxy(proxy) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait(tasks)) sys.stdout.flush() time.sleep(5) except Exception as e: print('测试器发生错误', e.args)
class Random_Proxy(): def __init__(self): self.db = RedisClient() def get_proxy(self): ''' Get a random proxy ''' return str(self.db.random(), encoding='utf-8')
class Getter(): def __init__(self): self.redis = RedisClient() self.crawler = Crawler() def is_over_threshold(self): if self.redis.count() >= POOL_UPPER_THRESHOLD: return True else: return False def run(self): print('CrawlFunc开始执行') if not self.is_over_threshold(): for callback_label in range(self.crawler.__CrawlFuncCount__): callback = self.crawler.__CrawlName__[callback_label] #开始抓取代理 proxies = self.crawler.get_proxies(callback) sys.stdout.flush() for proxy in proxies: self.redis.add(proxy)
class ValidTester(): def __init__(self, website='default'): self.website = website self.cookies_db = RedisClient('cookies', self.website) self.accounts_db = RedisClient('accounts', self.website) def test(self, username, cookies): raise NotImplementedError('test: not implemented!') def run(self): cookies_groups = self.cookies_db.all() for username, cookies in cookies_groups.items(): self.test(username, cookies)
class CookiesGenerator(): def __init__(self, website='default'): self.website = website self.cookies_db = RedisClient('cookies', self.website) self.accounts_db = RedisClient('accounts', self.website) def new_cookies(self, username, password): '''Get cookies, subclass needs to be overridden.''' raise NotImplementedError("new_cookies: not implemented!") # def process_cookies(self, cookies): # '''Handling cookies to dict style''' # dic = {} # for cookie in cookies: # dic[cookie['name']] = cookie['value'] # return dic def run(self): '''check the cookies in redis''' accounts_usernames = self.accounts_db.usernames() cookies_usernames = self.cookies_db.usernames() for username in accounts_usernames: if not username in cookies_usernames: # if not exist, then generate password = self.accounts_db.get(username) print(' Generating Cookies', ' username: '******'utf-8'), ' password: '******'utf-8')) result = self.new_cookies(username, password) if result.get('status') == 1: # login succeeded if self.cookies_db.set(username, json.dumps(result.get('content'))): print('Save cookies successfully!') else: # login failed print(result.get('content')) if self.accounts_db.delete(username): print('Account deleted successfully!')
def __init__(self): self.redis = RedisClient()
def __init__(self): self.db = RedisClient()
def __init__(self): self.redis = RedisClient() self.crawler = Crawler()
def __init__(self, website='default'): self.website = website self.cookies_db = RedisClient('cookies', self.website) self.accounts_db = RedisClient('accounts', self.website)
from RedisDB import RedisClient conn = RedisClient() def set(proxy): result = conn.add(proxy) print(proxy) print('录入成功' if result else '录入失败') def scan(): print('请输入代理, 输入exit退出读入') while True: proxy = input() if proxy == 'exit': break set(proxy) if __name__ == '__main__': scan()
import time from multiprocessing import Process from api import app from generator import GithubCookiesGenerator from tester import GithubValidTester from RedisDB import RedisClient if __name__ == "__main__": redis = RedisClient('accounts', 'github') redis.set('jsrglc', 'liuchennuaa2010') redis.set('js', 'liu') print('Cookies 生成进程开始运行 ') generator = GithubCookiesGenerator() generator.run() print('Cookies 生成完成 ') print('Cookies 检测进程开始运行 ') tester = GithubValidTester() tester.run() print('Cookies 检测完成 ') app.run()