Exemple #1
0
class Getter():
    def __init__(self):
        self.redis = RedisClient()
        self.crawler = Crawler()

    def is_over_threshold(self):
        '''
        Jude whether the number limit of proxies is reached
        '''
        if self.redis.count() >= POOL_UPPER_THRESHOLD:
            return True
        else:
            return False

    def run(self):
        crawlerCount = self.crawler.__CrawlFuncCount__
        print('crawlerCount: ', crawlerCount)
        for callback_lable in range(crawlerCount):
            if self.is_over_threshold():
                print('The proxies in the pool is too many!')
                break
            callback = self.crawler.__CrawlFunc__[callback_lable]
            proxies = self.crawler.get_proxies(callback)
            for proxy in proxies:
                self.redis.add(proxy)
Exemple #2
0
def tester_run():
    print(' Tester is running...')

    redis = RedisClient()
    count = redis.count()
    proxy_batchs = []
    for i in range(0, count, BATCH_TEST_SIZE):
        start, end = i, min(i + BATCH_TEST_SIZE, count) - 1
        proxy_batchs.append(redis.batch(start, end))

    pool = ThreadPool()
    pool.map(test_batch_proxies, proxy_batchs)
    pool.close()
    pool.join()
Exemple #3
0
async def test_single_proxy(proxy):
    '''Test single proxy

    args:
        redis: name of redisDB
        proxy: proxy
    return:
        None
    '''
    redis = RedisClient()
    conn = aiohttp.TCPConnector(verify_ssl=False)
    async with aiohttp.ClientSession(connector=conn) as seesion:
        try:
            if isinstance(proxy, bytes):
                proxy = proxy.decode('utf-8')
            real_proxy = 'http://' + proxy
            print('Testing ', proxy)
            async with seesion.get(TEST_URL, proxy=real_proxy,
                                   timeout=15) as response:
                if response.status in VALID_STATUS_CODE:
                    redis.max(proxy)
                    print('Proxy ', proxy, ' is valid, set max score.')
                else:
                    redis.decrease(proxy)
                    print('Response code is not valid. Proxy ', proxy,
                          ' is invalid, decrease score.')
        except:
            redis.decrease(proxy)
            print('Proxy response failed. Proxy ', proxy,
                  ' is invalid, decrease score.')
Exemple #4
0
def get_conn():
    '''
    Get redis client object
    '''
    if not hasattr(g, 'redis'):
        g.redis = RedisClient()
    return g.redis
Exemple #5
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        """
        测试单个代理
        :param proxy:
        :return:
        """
        conn = aiohttp.TCPConnector(verify_ssl=False)  #防止证书报错
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('正在测试', proxy)
                async with session.get(TEST_URL,
                                       proxy=real_proxy,
                                       timeout=15,
                                       allow_redirects=False) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                        print('代理可用', proxy)
                    else:
                        self.redis.decrease(proxy)
                        print('请求响应码不合法 ', response.status, 'IP', proxy)
            except (ClientError,
                    aiohttp.client_exceptions.ClientConnectorError,
                    asyncio.TimeoutError, AttributeError):
                self.redis.decrease(proxy)
                print('代理请求失败', proxy)

    def run(self):
        """
        测试主函数
        :return:
        """
        print('测试器开始运行')
        try:
            count = self.redis.count()
            print('当前剩余', count, '个代理')
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                stop = min(i + BATCH_TEST_SIZE, count)
                print('正在测试第', start + 1, '-', stop, '个代理')
                test_proxies = self.redis.batch(start, stop)
                loop = asyncio.get_event_loop()
                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))
                sys.stdout.flush()
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)
Exemple #6
0
class Random_Proxy():
    def __init__(self):
        self.db = RedisClient()

    def get_proxy(self):
        '''
        Get a random proxy
        '''
        return str(self.db.random(), encoding='utf-8')
Exemple #7
0
class Getter():
    def __init__(self):
        self.redis = RedisClient()
        self.crawler = Crawler()

    def is_over_threshold(self):
        if self.redis.count() >= POOL_UPPER_THRESHOLD:
            return True
        else:
            return False
    def run(self):
        print('CrawlFunc开始执行')
        if not self.is_over_threshold():
            for callback_label in range(self.crawler.__CrawlFuncCount__):
                callback = self.crawler.__CrawlName__[callback_label]
                #开始抓取代理
                proxies = self.crawler.get_proxies(callback)
                sys.stdout.flush()
                for proxy in proxies:
                    self.redis.add(proxy)
Exemple #8
0
class ValidTester():
    def __init__(self, website='default'):
        self.website = website
        self.cookies_db = RedisClient('cookies', self.website)
        self.accounts_db = RedisClient('accounts', self.website)

    def test(self, username, cookies):
        raise NotImplementedError('test: not implemented!')

    def run(self):
        cookies_groups = self.cookies_db.all()
        for username, cookies in cookies_groups.items():
            self.test(username, cookies)
Exemple #9
0
class CookiesGenerator():
    def __init__(self, website='default'):
        self.website = website
        self.cookies_db = RedisClient('cookies', self.website)
        self.accounts_db = RedisClient('accounts', self.website)

    def new_cookies(self, username, password):
        '''Get cookies, subclass needs to be overridden.'''
        raise NotImplementedError("new_cookies: not implemented!")


#    def process_cookies(self, cookies):
#        '''Handling cookies to dict style'''
#        dic = {}
#        for cookie in cookies:
#            dic[cookie['name']] = cookie['value']
#        return dic

    def run(self):
        '''check the cookies in redis'''
        accounts_usernames = self.accounts_db.usernames()
        cookies_usernames = self.cookies_db.usernames()
        for username in accounts_usernames:
            if not username in cookies_usernames:
                # if not exist, then generate
                password = self.accounts_db.get(username)
                print(' Generating Cookies', ' username: '******'utf-8'), ' password: '******'utf-8'))
                result = self.new_cookies(username, password)
                if result.get('status') == 1:  # login succeeded
                    if self.cookies_db.set(username,
                                           json.dumps(result.get('content'))):
                        print('Save cookies successfully!')
                else:  # login failed
                    print(result.get('content'))
                    if self.accounts_db.delete(username):
                        print('Account deleted successfully!')
Exemple #10
0
 def __init__(self):
     self.redis = RedisClient()
Exemple #11
0
 def __init__(self):
     self.db = RedisClient()
Exemple #12
0
 def __init__(self):
     self.redis = RedisClient()
     self.crawler = Crawler()
Exemple #13
0
 def __init__(self, website='default'):
     self.website = website
     self.cookies_db = RedisClient('cookies', self.website)
     self.accounts_db = RedisClient('accounts', self.website)
Exemple #14
0
from RedisDB import RedisClient

conn = RedisClient()


def set(proxy):
    result = conn.add(proxy)

    print(proxy)
    print('录入成功' if result else '录入失败')


def scan():
    print('请输入代理, 输入exit退出读入')
    while True:
        proxy = input()
        if proxy == 'exit':
            break
        set(proxy)


if __name__ == '__main__':
    scan()
Exemple #15
0
import time
from multiprocessing import Process
from api import app
from generator import GithubCookiesGenerator
from tester import GithubValidTester
from RedisDB import RedisClient

if __name__ == "__main__":
    redis = RedisClient('accounts', 'github')
    redis.set('jsrglc', 'liuchennuaa2010')
    redis.set('js', 'liu')

    print('Cookies 生成进程开始运行 ')
    generator = GithubCookiesGenerator()
    generator.run()
    print('Cookies 生成完成 ')

    
    print('Cookies 检测进程开始运行 ')
    tester = GithubValidTester()
    tester.run()
    print('Cookies 检测完成 ')

    app.run()