Example #1
0
class ValidateTester():
    # 检测代理
    def __init__(self):
        # 能使用的代理
        self.usable_proxies = []

    def set_raw_proxies(self, proxies):
        # 设置两个变量
        self._raw_proxies = proxies
        self._conn = RedisClient()

    async def test_proxy(self, proxy):
        # 异步校验代理proxy是否有效
        real_proxy = 'http://' + proxy
        print(real_proxy)
        async with aiohttp.ClientSession() as session:
            # 执行异步获取响应
            try:
                async with session.get(TEST_IP,
                                       proxy=real_proxy,
                                       timeout=TEST_PROXY_TIMEOUT) as response:
                    if response.status == 200:
                        self._conn.put(proxy)
            except:
                # 如果获取不到,超时异常,则表示代理失效
                print('%s代理失效' % proxy)

    def test(self):
        # 测试代理是否有效
        print('测试代理是否有效开始')
        loop = asyncio.get_event_loop()
        # proxy为 IP:PORT
        task = [self.test_proxy(proxy) for proxy in self._raw_proxies]
        loop.run_until_complete(asyncio.wait(task))
Example #2
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        """
        测试单个代理
        :param proxy:
        :return:
        """
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                # 将bytes对象解码成字符串,默认使用utf-8进行解码。防止数据库提取的proxy是bytes格式。
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('正在测试', proxy)
                async with session.get(TEST_URL,
                                       proxy=real_proxy,
                                       timeout=15,
                                       allow_redirects=False) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                        print('代理可用', proxy)
                    else:
                        self.redis.decrease(proxy)
                        print('请求响应码不合法 ', response.status, 'IP', proxy)

            except (ClientError,
                    aiohttp.client_exceptions.ClientConnectorError,
                    asyncio.TimeoutError, AttributeError):
                self.redis.decrease(proxy)
                print('代理请求失败', proxy)

    def run(self):
        """
        测试主函数
        :return:
        """
        print('测试器开始运行')
        try:
            count = self.redis.count()
            print('当前剩余', count, '个代理')
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                stop = min(i + BATCH_TEST_SIZE, count)
                print('正在测试第', start + 1, '-', stop, '个代理')
                test_proxies = self.redis.batch(start, stop)
                loop = asyncio.get_event_loop()
                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))
                sys.stdout.flush()
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)
Example #3
0
 def valid_proxies(cycle=VALID_CHECK_CYCLE):
     # 定时检测代理器,根据传入的时间来定时检测
     # 获取redis客户端
     conn = RedisClient()
     # 检测代理是否可用
     tester = ValidateTester()
     while True:
         count = int(conn.queue_len * 0.5)
         # 获取队列中前一半的代理
         raw_proxies = conn.get(count)
         # 检测代理
         tester.set_raw_proxies(raw_proxies)
         # 开始检测代理
         tester.test()
         # 休眠指定时长
         time.sleep(cycle)
Example #4
0
def cookie_test(id):
    """
    cookie测试
    :return:
    """
    conn = RedisClient('taobao', 'cookie')
    cookies = conn.result(id)
    cookies = eval(cookies)
    cookie = cookies2cookie(cookies)
    user = get_user(cookie)
    comment = get_comment(cookie)
    info = get_info(cookie)
    colloct = get_collect(cookie)
    order = get_order(cookie)
    like = guess_you_like(cookie)
    data = {
        'id': user,
        '用户信息': info,
        '信誉明细': comment,
        '购物数据': order,
        '收藏信息': colloct,
        '猜你喜欢': like,
    }
    conn3 = RedisClient('taobao', 'result')
    conn3.set(user, data)
    return info
Example #5
0
class Getter():
    def __init__(self):
        self.redis = RedisClient()
        self.crawler = Crawler()
    
    # 判断是否达到了代理池限制
    def is_over_threshold(self):
        if self.redis.count() >= POOL_UPPER_THRESHOLD:
            return True
        else:
            return False
    
    def run(self):
        print('获取器开始执行')
        if not self.is_over_threshold():
            for site_name in self.crawler.__CrawlName__:
                proxies = self.crawler.get_raw_proxies(site_name)
                sys.stdout.flush()
                for item in proxies:
                    self.redis.add(item)
Example #6
0
 def check_pool(lower_threshold=POOL_LOWER_THRESHOLD,
                upper_threshold=POOL_UPPER_THRESHOLD,
                cycle=POOL_CHECK_CYCLE):
     # 检测代理池中的IP数量,如果太少则爬取,如果太多则不爬取
     # 获取redis连接对象
     conn = RedisClient()
     print('1')
     adder = PoolAdder(upper_threshold)
     while True:
         # 如果redis中代理队列的长度小于最小阈值,则执行爬取代理任务
         if conn.queue_len < lower_threshold:
             # 执行爬取代理任务
             adder.add_to_queue()
         time.sleep(cycle)
Example #7
0
def qrcode(task_id):
    """
    根据任务id(task.id)查询二维码
    :return: 二维码QRCODE
    """
    conn = RedisClient('taobao', 'qrcode')
    result = conn.result(task_id)
    # 将查询结果转成字典
    result = eval(result)
    if result is not None:
        response = {
            "resultcode": "200",
            "reason": "二维码获取成功",
            "result": result
        }
        return jsonify(response)
    else:
        response = {
            "resultcode": "404",
            "reason": "二维码获取失败",
            "result": None
        }
        return jsonify(response)
Example #8
0
def browser():
    driver = Browser()
    qrcode = driver.create_qrcode()
    conn = RedisClient('taobao', 'qrcode')
    id = uuid.uuid1().hex
    response = {'uuid': id, 'qrcode': qrcode}
    conn.set(browser.request.id, response)
    driver.verify_login()
    if driver.login_signal == 1:
        cookie = driver.get_cookie()
        conn2 = RedisClient('taobao', 'cookie')
        conn2.set(id, cookie)
        celery.send_task('app.tasks.cookie_test', args=(id, ))
        return True
    if driver.login_signal == 0:
        print('任务失败')
        return '任务失败'
Example #9
0
def find_user_info(user):
    user = str(user)
    print(user)
    print(type(user))
    try:
        # result = db[TAOBAO_COLLECTION].find_one({"id": "英雄也枉然", })
        conn = RedisClient('taobao', 'result')
        result = conn.result(user)
        result = eval(result)
        print(type(result))
        response = {
            "resultcode": "200",
            "reason": "查询成功",
            "result": result
        }
        return jsonify(response)
    except Exception as e:
        print('查询失败', e)
        response = {
            "resultcode": "404",
            "reason": "查询失败",
            "result": None
        }
        return jsonify(response)
Example #10
0
def get_redis_conn():
    # 获取redis连接,并设置全局上下文对象g
    # hasattr()用于判断某对象是否有属性,有则返回True,否则返回False
    if not hasattr(g, 'redis_client'):
        g.redis_client = RedisClient()
    return g.redis_client
Example #11
0
 def __init__(self, threshold):
     self._threshold = threshold
     self._conn = RedisClient()
     self._crawl = FreeProxyGetter()
     self._tester = ValidateTester()
Example #12
0
 def set_raw_proxies(self, proxies):
     # 设置两个变量
     self._raw_proxies = proxies
     self._conn = RedisClient()
Example #13
0
 def __init__(self):
     self.redis = RedisClient()
     self.crawler = Crawler()
Example #14
0
 def __init__(self):
     self.redis = RedisClient()