コード例 #1
0
ファイル: cookie_pool.py プロジェクト: kopa-kongpan/feapder
class PageCookiePool(CookiePoolInterface):
    """
    由页面产生的cookie 不需要用户登陆
    """

    def __init__(
        self,
        redis_key,
        page_url=None,
        min_cookies=10000,
        must_contained_keys=(),
        keep_alive=False,
        **kwargs,
    ):
        """
        @param redis_key: 项目名
        @param page_url: 生产cookie的url
        @param min_cookies: 最小cookie数
        @param must_contained_keys: cookie 必须包含的key
        @param keep_alive: 当cookie数量足够是是否保持随时待命,生产cookie的状态。False为否,满足则退出
        ---
        @param kwargs: WebDriver的一些参数
            load_images: 是否加载图片
            user_agent_pool: user-agent池 为None时不使用
            proxies_pool: ;代理池 为None时不使用
            headless: 是否启用无头模式
            driver_type: web driver 类型
            timeout: 请求超时时间 默认16s
            window_size: 屏幕分辨率 (width, height)

        """

        self._redisdb = RedisDB()

        self._tab_cookie_pool = "{}:l_cookie_pool".format(redis_key)
        self._tab_cookie_pool_last_count = "{}:str_cookie_pool_count".format(
            redis_key
        )  # 存储上一次统计cookie 数量的时间,格式为 时间戳:数量
        self._page_url = page_url
        self._min_cookies = min_cookies
        self._must_contained_keys = must_contained_keys
        self._keep_alive = keep_alive

        self._kwargs = kwargs
        self._kwargs.setdefault("load_images", False)
        self._kwargs.setdefault("headless", True)

    def create_cookie(self):
        """
        可能会重写
        @return:
        """
        with WebDriver(**self._kwargs) as driver:
            driver.get(self._page_url)

            cookies = driver.get_cookies()

            cookies_json = {}
            for cookie in cookies:
                cookies_json[cookie["name"]] = cookie["value"]

            for key in self._must_contained_keys:
                if key not in cookies_json:
                    break
            else:
                return cookies_json

            log.error("获取cookie失败 cookies = {}".format(cookies_json))
            return None

    def add_cookies(self, cookies):
        log.info("添加cookie {}".format(cookies))
        self._redisdb.lpush(self._tab_cookie_pool, cookies)

    def run(self):
        while True:
            try:
                now_cookie_count = self._redisdb.lget_count(self._tab_cookie_pool)
                need_cookie_count = self._min_cookies - now_cookie_count

                if need_cookie_count > 0:
                    log.info(
                        "当前cookie数为 {} 小于 {}, 生产cookie".format(
                            now_cookie_count, self._min_cookies
                        )
                    )
                    try:
                        cookies = self.create_cookie()
                        if cookies:
                            self.add_cookies(cookies)
                    except Exception as e:
                        log.exception(e)
                else:
                    log.info("当前cookie数为 {} 数量足够 暂不生产".format(now_cookie_count))

                    # 判断cookie池近一分钟数量是否有变化,无变化则认为爬虫不再用了,退出
                    last_count_info = self._redisdb.strget(
                        self._tab_cookie_pool_last_count
                    )
                    if not last_count_info:
                        self._redisdb.strset(
                            self._tab_cookie_pool_last_count,
                            "{}:{}".format(time.time(), now_cookie_count),
                        )
                    else:
                        last_time, last_count = last_count_info.split(":")
                        last_time = float(last_time)
                        last_count = int(last_count)

                        if time.time() - last_time > 60:
                            if now_cookie_count == last_count:
                                log.info("近一分钟,cookie池数量无变化,判定爬虫未使用,退出生产")
                                break
                            else:
                                self._redisdb.strset(
                                    self._tab_cookie_pool_last_count,
                                    "{}:{}".format(time.time(), now_cookie_count),
                                )

                    if self._keep_alive:
                        log.info("sleep 10")
                        tools.delay_time(10)
                    else:
                        break

            except Exception as e:
                log.exception(e)
                tools.delay_time(1)

    def get_cookie(self, wait_when_null=True):
        while True:
            try:
                cookie_info = self._redisdb.rpoplpush(self._tab_cookie_pool)
                if not cookie_info and wait_when_null:
                    log.info("暂无cookie 生产中...")
                    self._keep_alive = False
                    self._min_cookies = 1
                    with RedisLock(
                        key=self._tab_cookie_pool, lock_timeout=3600, wait_timeout=5
                    ) as _lock:
                        if _lock.locked:
                            self.run()
                    continue
                return eval(cookie_info) if cookie_info else {}
            except Exception as e:
                log.exception(e)
                tools.delay_time(1)

    def del_cookie(self, cookies):
        self._redisdb.lrem(self._tab_cookie_pool, cookies)
コード例 #2
0
ファイル: test_redis.py プロジェクト: zhouxinfei/feapder
# -*- coding: utf-8 -*-
"""
Created on 2021/3/4 11:01 下午
---------
@summary:
---------
@author: Boris
@email: [email protected]
"""

from feapder.db.redisdb import RedisDB

redis = RedisDB(ip_ports="localhost:6379", db=0)

redis.lpush("l_test", 2)
redis.lpush("l_test", 3)

print(redis.lrange("l_test"))
print(redis.lrem("l_test", 2))
print(redis.lrange("l_test"))
コード例 #3
0
ファイル: cookie_pool.py プロジェクト: kopa-kongpan/feapder
class LoginCookiePool(CookiePoolInterface):
    """
    需要登陆的cookie池, 用户账号密码等信息用mysql保存
    """

    def __init__(
        self,
        redis_key,
        *,
        table_userbase,
        login_state_key="login_state",
        lock_state_key="lock_state",
        username_key="username",
        password_key="password",
        login_retry_times=10,
    ):
        """
        @param redis_key: 项目名
        @param table_userbase: 用户表名
        @param login_state_key: 登录状态列名
        @param lock_state_key: 封锁状态列名
        @param username_key: 登陆名列名
        @param password_key: 密码列名
        @param login_retry_times: 登陆失败重试次数
        """

        self._tab_cookie_pool = "{}:l_cookie_pool".format(redis_key)
        self._login_retry_times = login_retry_times
        self._table_userbase = table_userbase
        self._login_state_key = login_state_key
        self._lock_state_key = lock_state_key
        self._username_key = username_key
        self._password_key = password_key

        self._redisdb = RedisDB()
        self._mysqldb = MysqlDB()

        self.create_userbase()

    def create_userbase(self):
        sql = f"""
            CREATE TABLE IF NOT EXISTS `{self._table_userbase}` (
              `id` int(10) unsigned NOT NULL AUTO_INCREMENT,
              `{self._username_key}` varchar(50) DEFAULT NULL COMMENT '用户名',
              `{self._password_key}` varchar(255) DEFAULT NULL COMMENT '密码',
              `{self._login_state_key}` int(11) DEFAULT '0' COMMENT '登录状态(0未登录 1已登录)',
              `{self._lock_state_key}` int(11) DEFAULT '0' COMMENT '账号是否被封(0 未封 1 被封)',
              PRIMARY KEY (`id`),
              UNIQUE KEY `username` (`username`) USING BTREE
            ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
        """
        self._mysqldb.execute(sql)

    def create_cookie(self, username, password):
        """
        创建cookie
        @param username: 用户名
        @param password: 密码
        @return: return cookie / None
        """
        raise NotImplementedError

    def get_user_info(self):
        """
        返回用户信息
        @return: yield username, password
        """

        sql = "select {username_key}, {password_key} from {table_userbase} where {lock_state_key} != 1 and {login_state_key} != 1".format(
            username_key=self._username_key,
            password_key=self._password_key,
            table_userbase=self._table_userbase,
            lock_state_key=self._lock_state_key,
            login_state_key=self._login_state_key,
        )

        return self._mysqldb.find(sql)

    def handle_login_failed_user(self, username, password):
        """
        处理登录失败的user
        @param username:
        @param password:
        @return:
        """

        pass

    def handel_exception(self, e):
        """
        处理异常
        @param e:
        @return:
        """
        log.exception(e)

    def save_cookie(self, username, cookie):
        user_cookie = {"username": username, "cookie": cookie}

        self._redisdb.lpush(self._tab_cookie_pool, user_cookie)

        sql = "update {table_userbase} set {login_state_key} = 1 where {username_key} = '{username}'".format(
            table_userbase=self._table_userbase,
            login_state_key=self._login_state_key,
            username_key=self._username_key,
            username=username,
        )

        self._mysqldb.update(sql)

    def get_cookie(self, wait_when_null=True) -> User:
        while True:
            try:
                user_cookie = self._redisdb.rpoplpush(self._tab_cookie_pool)
                if not user_cookie and wait_when_null:
                    log.info("暂无cookie 生产中...")
                    self.login()
                    continue

                if user_cookie:
                    user_cookie = eval(user_cookie)
                    return User(**user_cookie)

                return None
            except Exception as e:
                log.exception(e)
                tools.delay_time(1)

    def del_cookie(self, user: User):
        """
        删除失效的cookie
        @param user:
        @return:
        """
        user_info = {"username": user.username, "cookie": user.cookie}
        self._redisdb.lrem(self._tab_cookie_pool, user_info)

        sql = "update {table_userbase} set {login_state_key} = 0 where {username_key} = '{username}'".format(
            table_userbase=self._table_userbase,
            login_state_key=self._login_state_key,
            username_key=self._username_key,
            username=user.username,
        )

        self._mysqldb.update(sql)

    def user_is_locked(self, user: User):
        sql = "update {table_userbase} set {lock_state_key} = 1 where {username_key} = '{username}'".format(
            table_userbase=self._table_userbase,
            lock_state_key=self._lock_state_key,
            username_key=self._username_key,
            username=user.username,
        )

        self._mysqldb.update(sql)

    def run(self):
        with RedisLock(
            key=self._tab_cookie_pool, lock_timeout=3600, wait_timeout=100
        ) as _lock:
            if _lock.locked:
                user_infos = self.get_user_info()
                if not isinstance(user_infos, Iterable):
                    raise ValueError("get_user_info 返回值必须可迭代")

                if not user_infos:
                    log.info("无可用用户")

                for username, password in user_infos:
                    for i in range(self._login_retry_times):
                        try:
                            cookie = self.create_cookie(username, password)
                            if cookie:
                                self.save_cookie(username, cookie)
                            else:
                                self.handle_login_failed_user(username, password)

                            break
                        except Exception as e:
                            self.handel_exception(e)

                    else:
                        self.handle_login_failed_user(username, password)

    login = run