コード例 #1
0
class Drission(object):
    """Drission类用于管理WebDriver对象和Session对象,是驱动器的角色"""
    def __init__(self,
                 driver_or_options: Union[WebDriver, dict, Options,
                                          DriverOptions] = None,
                 session_or_options: Union[Session, dict,
                                           SessionOptions] = None,
                 ini_path: str = None,
                 proxy: dict = None):
        """初始化,可接收现成的WebDriver和Session对象,或接收它们的配置信息生成对象       \n
        :param driver_or_options: driver对象或chrome设置,Options类或设置字典
        :param session_or_options: Session对象或设置
        :param ini_path: ini文件路径
        :param proxy: 代理设置
        """
        self._session = None
        self._driver = None
        self._debugger = None
        self._proxy = proxy

        om = OptionsManager(
            ini_path
        ) if session_or_options is None or driver_or_options is None else None

        # ------------------处理session options----------------------
        if session_or_options is None:
            self._session_options = om.session_options

        else:
            # 若接收到Session对象,直接记录
            if isinstance(session_or_options, Session):
                self._session = session_or_options

            # 否则记录其配置信息
            else:
                self._session_options = _session_options_to_dict(
                    session_or_options)

        # ------------------处理driver options----------------------
        if driver_or_options is None:
            self._driver_options = om.chrome_options
            self._driver_options['driver_path'] = om.get_value(
                'paths', 'chromedriver_path')

        else:
            # 若接收到WebDriver对象,直接记录
            if isinstance(driver_or_options, WebDriver):
                self._driver = driver_or_options

            # 否则记录其配置信息
            else:
                self._driver_options = _chrome_options_to_dict(
                    driver_or_options)

    @property
    def session(self) -> Session:
        """返回Session对象,如未初始化则按配置信息创建"""
        if self._session is None:
            self._set_session(self._session_options)

            if self._proxy:
                self._session.proxies = self._proxy

        return self._session

    @property
    def driver(self) -> WebDriver:
        """返回WebDriver对象,如未初始化则按配置信息创建。         \n
        如设置了本地调试浏览器,可自动接入或打开浏览器进程。
        """
        if self._driver is None:
            if not isinstance(self._driver_options, dict):
                raise TypeError('Driver options invalid')

            options = _dict_to_chrome_options(self._driver_options)

            if not self._driver_options.get('debugger_address',
                                            None) and self._proxy:
                options.add_argument(f'--proxy-server={self._proxy["http"]}')

            driver_path = self._driver_options.get('driver_path',
                                                   None) or 'chromedriver'
            chrome_path = self._driver_options.get('binary_location',
                                                   None) or 'chrome.exe'

            # -----------若指定debug端口且该端口未在使用中,则先启动浏览器进程-----------
            if options.debugger_address and _check_port(
                    options.debugger_address) is False:
                from subprocess import Popen
                port = options.debugger_address[options.debugger_address.
                                                rfind(':') + 1:]

                # 启动浏览器进程,同时返回该进程使用的 chrome.exe 路径
                chrome_path, self._debugger = _create_chrome(
                    chrome_path, port, self._driver_options['arguments'],
                    self._proxy)

            # -----------创建WebDriver对象-----------
            self._driver = _create_driver(chrome_path, driver_path, options)

            # 反反爬设置
            try:
                self._driver.execute_script(
                    'Object.defineProperty(navigator,"webdriver",{get:() => Chrome,});'
                )
            except:
                pass

            # self._driver.execute_cdp_cmd(
            #     'Page.addScriptToEvaluateOnNewDocument',
            #     {'source': 'Object.defineProperty(navigator,"webdriver",{get:() => Chrome,});'})

        return self._driver

    @property
    def driver_options(self) -> dict:
        """返回driver配置信息"""
        return self._driver_options

    @property
    def session_options(self) -> dict:
        """返回session配置信息"""
        return self._session_options

    @session_options.setter
    def session_options(self, options: Union[dict, SessionOptions]) -> None:
        """设置session配置                  \n
        :param options: session配置字典
        :return: None
        """
        self._session_options = _session_options_to_dict(options)
        self._set_session(self._session_options)

    @property
    def proxy(self) -> Union[None, dict]:
        """返回代理信息"""
        return self._proxy

    @proxy.setter
    def proxy(self, proxies: dict = None) -> None:
        """设置代理信息                \n
        :param proxies: 代理信息字典
        :return: None
        """
        self._proxy = proxies

        if self._session:
            self._session.proxies = proxies

        if self._driver:
            cookies = self._driver.get_cookies()
            url = self._driver.current_url
            self._driver.quit()
            self._driver = None
            self._driver = self.driver
            self._driver.get(url)

            for cookie in cookies:
                self.set_cookies(cookie, set_driver=True)

    @property
    def debugger_progress(self):
        """调试浏览器进程"""
        return self._debugger

    def kill_browser(self) -> None:
        """关闭浏览器进程(如果可以)"""
        if self.debugger_progress:
            self.debugger_progress.kill()
            return

        address = self.driver_options.get('debugger_address', '').split(':')
        if len(address) == 1:
            self.close_driver()

        elif len(address) == 2:
            ip, port = address
            if ip not in ('127.0.0.1', 'localhost') or not port.isdigit():
                return

            from os import popen
            progresses = popen(f'netstat -nao | findstr :{port}').read().split(
                '\n')
            txt = ''
            for progress in progresses:
                if 'LISTENING' in progress:
                    txt = progress
                    break

            if not txt:
                return

            pid = txt[txt.rfind(' ') + 1:]
            if popen(f'tasklist | findstr {pid}').read().lower().startswith(
                    'chrome.exe'):
                popen(f'taskkill /pid {pid} /F')

    def set_cookies(self,
                    cookies: Union[RequestsCookieJar, list, tuple, str, dict],
                    set_session: bool = False,
                    set_driver: bool = False) -> None:
        """设置cookies                                                      \n
        :param cookies: cookies信息,可为CookieJar, list, tuple, str, dict
        :param set_session: 是否设置session的cookies
        :param set_driver: 是否设置driver的cookies
        :return: None
        """
        cookies = _cookies_to_tuple(cookies)

        for cookie in cookies:
            if cookie['value'] is None:
                cookie['value'] = ''

            # 添加cookie到session
            if set_session:
                kwargs = {
                    x: cookie[x]
                    for x in cookie
                    if x.lower() not in ('name', 'value', 'httponly', 'expiry',
                                         'samesite')
                }

                if 'expiry' in cookie:
                    kwargs['expires'] = cookie['expiry']

                self.session.cookies.set(cookie['name'], cookie['value'],
                                         **kwargs)

            # 添加cookie到driver
            if set_driver:
                if 'expiry' in cookie:
                    cookie['expiry'] = int(cookie['expiry'])

                try:
                    browser_domain = extract(self.driver.current_url).fqdn
                except AttributeError:
                    browser_domain = ''

                if not cookie.get('domain', None):
                    if browser_domain:
                        url = extract(browser_domain)
                        cookie_domain = f'{url.domain}.{url.suffix}'
                    else:
                        raise ValueError(
                            'There is no domain name in the cookie or the browser has not visited a URL.'
                        )

                    cookie['domain'] = cookie_domain

                else:
                    cookie_domain = cookie['domain'] if cookie['domain'][
                        0] != '.' else cookie['domain'][1:]

                if cookie_domain not in browser_domain:
                    self.driver.get(cookie_domain if cookie_domain.startswith(
                        'http://') else f'http://{cookie_domain}')

                # 避免selenium自动添加.后无法正确覆盖已有cookie
                if cookie['domain'][0] != '.':
                    c = self.driver.get_cookie(cookie['name'])
                    if c and c['domain'] == cookie['domain']:
                        self.driver.delete_cookie(cookie['name'])

                self.driver.add_cookie(cookie)

    def _set_session(self, data: dict) -> None:
        """根据传入字典对session进行设置    \n
        :param data: session配置字典
        :return: None
        """
        if self._session is None:
            self._session = Session()

        attrs = [
            'headers', 'auth', 'proxies', 'hooks', 'params', 'verify', 'cert',
            'stream', 'trust_env', 'max_redirects'
        ]  # , 'adapters'

        if 'cookies' in data:
            self.set_cookies(data['cookies'], set_session=True)

        for i in attrs:
            if i in data:
                self._session.__setattr__(i, data[i])

    def cookies_to_session(self, copy_user_agent: bool = False) -> None:
        """把driver对象的cookies复制到session对象    \n
        :param copy_user_agent: 是否复制ua信息
        :return: None
        """
        if copy_user_agent:
            self.user_agent_to_session(self.driver, self.session)

        self.set_cookies(self.driver.get_cookies(), set_session=True)

    def cookies_to_driver(self, url: str) -> None:
        """把session对象的cookies复制到driver对象  \n
        :param url: 作用域
        :return: None
        """
        browser_domain = extract(self.driver.current_url).fqdn
        ex_url = extract(url)

        if ex_url.fqdn not in browser_domain:
            self.driver.get(url)

        domain = f'{ex_url.domain}.{ex_url.suffix}'

        cookies = []
        for cookie in self.session.cookies:
            if cookie.domain == '':
                cookie.domain = domain

            if domain in cookie.domain:
                cookies.append(cookie)

        self.set_cookies(cookies, set_driver=True)

    def user_agent_to_session(self,
                              driver: WebDriver = None,
                              session: Session = None) -> None:
        """把driver的user-agent复制到session    \n
        :param driver: 来源driver对象
        :param session: 目标session对象
        :return: None
        """
        driver = driver or self.driver
        session = session or self.session
        selenium_user_agent = driver.execute_script(
            "return navigator.userAgent;")
        session.headers.update({"User-Agent": selenium_user_agent})

    def close_driver(self) -> None:
        """关闭driver和浏览器"""
        if self._driver:
            self._driver.quit()
            self._driver = None

    def close_session(self) -> None:
        """关闭session"""
        if self._session:
            self._session.close()
            self._session = None

    def close(self) -> None:
        """关闭session、driver和浏览器"""
        if self._driver:
            self.close_driver()

        if self._session:
            self.close_session()

    def __del__(self):
        """关闭对象时关闭浏览器和Session"""
        try:
            self.close()
        except ImportError:
            pass
コード例 #2
0
ファイル: drission.py プロジェクト: g1879/DrissionPage
class Drission(object):
    """Drission类用于管理WebDriver对象和Session对象,是驱动器的角色"""
    def __init__(self,
                 driver_or_options: Union[RemoteWebDriver, Options,
                                          DriverOptions, bool] = None,
                 session_or_options: Union[Session, dict, SessionOptions,
                                           bool] = None,
                 ini_path: str = None,
                 proxy: dict = None):
        """初始化,可接收现成的WebDriver和Session对象,或接收它们的配置信息生成对象                     \n
        :param driver_or_options: driver对象或DriverOptions、Options类,传入False则创建空配置对象
        :param session_or_options: Session对象或设置字典,传入False则创建空配置对象
        :param ini_path: ini文件路径
        :param proxy: 代理设置
        """
        self._session = None
        self._driver = None
        self._session_options = None
        self._driver_options = None
        self._debugger = None
        self._proxy = proxy

        # ------------------处理session options----------------------
        if session_or_options is None:
            self._session_options = SessionOptions(ini_path=ini_path).as_dict()

        elif session_or_options is False:
            self._session_options = SessionOptions(read_file=False).as_dict()

        elif isinstance(session_or_options, Session):
            self._session = session_or_options

        elif isinstance(session_or_options, SessionOptions):
            self._session_options = session_or_options.as_dict()

        elif isinstance(session_or_options, dict):
            self._session_options = session_or_options

        else:
            raise TypeError(
                'session_or_options参数只能接收Session, dict, SessionOptions或False。')

        # ------------------处理driver options----------------------
        if driver_or_options is None:
            self._driver_options = DriverOptions(ini_path=ini_path)

        elif driver_or_options is False:
            self._driver_options = DriverOptions(read_file=False)

        elif isinstance(driver_or_options, RemoteWebDriver):
            self._driver = driver_or_options

        elif isinstance(driver_or_options, (Options, DriverOptions)):
            self._driver_options = driver_or_options

        else:
            raise TypeError(
                'driver_or_options参数只能接收WebDriver, Options, DriverOptions或False。'
            )

    def __del__(self):
        """关闭对象时关闭浏览器和Session"""
        try:
            self.close()
        except ImportError:
            pass

    @property
    def session(self) -> Session:
        """返回Session对象,如未初始化则按配置信息创建"""
        if self._session is None:
            self._set_session(self._session_options)

            if self._proxy:
                self._session.proxies = self._proxy

        return self._session

    @property
    def driver(self) -> WebDriver:
        """返回WebDriver对象,如未初始化则按配置信息创建。         \n
        如设置了本地调试浏览器,可自动接入或打开浏览器进程。
        """
        if self._driver is None:
            if not self.driver_options.debugger_address and self._proxy:
                self.driver_options.add_argument(
                    f'--proxy-server={self._proxy["http"]}')

            driver_path = self.driver_options.driver_path or 'chromedriver'
            chrome_path = self.driver_options.binary_location or 'chrome.exe'

            # -----------若指定debug端口且该端口未在使用中,则先启动浏览器进程-----------
            if self.driver_options.debugger_address and _check_port(
                    self.driver_options.debugger_address) is False:
                from subprocess import Popen
                port = self.driver_options.debugger_address.split(':')[-1]

                # 启动浏览器进程,同时返回该进程使用的 chrome.exe 路径
                chrome_path, self._debugger = _create_chrome(
                    chrome_path, port, self.driver_options.arguments,
                    self._proxy)

            # -----------创建WebDriver对象-----------
            self._driver = _create_driver(chrome_path, driver_path,
                                          self.driver_options)

            # 反反爬设置
            try:
                self._driver.execute_script(
                    'Object.defineProperty(navigator,"webdriver",{get:() => undefined,});'
                )
            except Exception:
                pass

            # self._driver.execute_cdp_cmd(
            #     'Page.addScriptToEvaluateOnNewDocument',
            #     {'source': 'Object.defineProperty(navigator,"webdriver",{get:() => Chrome,});'})

        return self._driver

    @property
    def driver_options(self) -> Union[DriverOptions, Options]:
        """返回driver配置信息"""
        return self._driver_options

    @property
    def session_options(self) -> dict:
        """返回session配置信息"""
        return self._session_options

    @session_options.setter
    def session_options(self, options: Union[dict, SessionOptions]) -> None:
        """设置session配置                  \n
        :param options: session配置字典
        :return: None
        """
        self._session_options = _session_options_to_dict(options)
        self._set_session(self._session_options)

    @property
    def proxy(self) -> Union[None, dict]:
        """返回代理信息"""
        return self._proxy

    @proxy.setter
    def proxy(self, proxies: dict = None) -> None:
        """设置代理信息                \n
        :param proxies: 代理信息字典
        :return: None
        """
        self._proxy = proxies

        if self._session:
            self._session.proxies = proxies

        if self._driver:
            cookies = self._driver.get_cookies()
            url = self._driver.current_url
            self._driver.quit()
            self._driver = None
            self._driver = self.driver
            self._driver.get(url)

            for cookie in cookies:
                self.set_cookies(cookie, set_driver=True)

    @property
    def debugger_progress(self):
        """调试浏览器进程"""
        return self._debugger

    def kill_browser(self) -> None:
        """关闭浏览器进程(如果可以)"""
        if self.debugger_progress:
            self.debugger_progress.kill()
            return

        pid = self.get_browser_progress_id()
        from os import popen
        from platform import system

        if pid and system().lower() == 'windows' \
                and popen(f'tasklist | findstr {pid}').read().lower().startswith('chrome.exe'):
            popen(f'taskkill /pid {pid} /F')

        else:
            self._driver.quit()

    def get_browser_progress_id(self) -> Union[str, None]:
        """获取浏览器进程id"""
        if self.debugger_progress:
            return self.debugger_progress.pid

        address = str(self.driver_options.debugger_address).split(':')
        if len(address) == 2:
            ip, port = address
            if ip not in ('127.0.0.1', 'localhost') or not port.isdigit():
                return None

            from os import popen
            txt = ''
            progresses = popen(f'netstat -nao | findstr :{port}').read().split(
                '\n')
            for progress in progresses:
                if 'LISTENING' in progress:
                    txt = progress
                    break
            if not txt:
                return None

            return txt.split(' ')[-1]

    def hide_browser(self) -> None:
        """隐藏浏览器界面"""
        self._show_or_hide_browser()

    def show_browser(self) -> None:
        """显示浏览器界面"""
        self._show_or_hide_browser(False)

    def _show_or_hide_browser(self, hide: bool = True) -> None:
        from platform import system
        if system().lower() != 'windows':
            raise OSError('该方法只能在Windows系统使用。')

        try:
            from win32gui import ShowWindow
            from win32con import SW_HIDE, SW_SHOW
        except ImportError:
            raise ImportError('请先安装:pip install pypiwin32')

        pid = self.get_browser_progress_id()
        if not pid:
            print(
                '只有设置了debugger_address参数才能使用 show_browser() 和 hide_browser()')
            return
        hds = _get_chrome_hwnds_from_pid(pid)
        sw = SW_HIDE if hide else SW_SHOW
        for hd in hds:
            ShowWindow(hd, sw)

    def set_cookies(self,
                    cookies: Union[RequestsCookieJar, list, tuple, str, dict],
                    set_session: bool = False,
                    set_driver: bool = False) -> None:
        """设置cookies                                                      \n
        :param cookies: cookies信息,可为CookieJar, list, tuple, str, dict
        :param set_session: 是否设置session的cookies
        :param set_driver: 是否设置driver的cookies
        :return: None
        """
        cookies = _cookies_to_tuple(cookies)

        for cookie in cookies:
            if cookie['value'] is None:
                cookie['value'] = ''

            # 添加cookie到session
            if set_session:
                kwargs = {
                    x: cookie[x]
                    for x in cookie
                    if x.lower() not in ('name', 'value', 'httponly', 'expiry',
                                         'samesite')
                }

                if 'expiry' in cookie:
                    kwargs['expires'] = cookie['expiry']

                self.session.cookies.set(cookie['name'], cookie['value'],
                                         **kwargs)

            # 添加cookie到driver
            if set_driver:
                if 'expiry' in cookie:
                    cookie['expiry'] = int(cookie['expiry'])

                try:
                    browser_domain = extract(self.driver.current_url).fqdn
                except AttributeError:
                    browser_domain = ''

                if not cookie.get('domain', None):
                    if browser_domain:
                        url = extract(browser_domain)
                        cookie_domain = f'{url.domain}.{url.suffix}'
                    else:
                        raise ValueError('cookie中没有域名或浏览器未访问过URL。')

                    cookie['domain'] = cookie_domain

                else:
                    cookie_domain = cookie['domain'] if cookie['domain'][
                        0] != '.' else cookie['domain'][1:]

                if cookie_domain not in browser_domain:
                    self.driver.get(cookie_domain if cookie_domain.startswith(
                        'http://') else f'http://{cookie_domain}')

                # 避免selenium自动添加.后无法正确覆盖已有cookie
                if cookie['domain'][0] != '.':
                    c = self.driver.get_cookie(cookie['name'])
                    if c and c['domain'] == cookie['domain']:
                        self.driver.delete_cookie(cookie['name'])

                self.driver.add_cookie(cookie)

    def _set_session(self, data: dict) -> None:
        """根据传入字典对session进行设置    \n
        :param data: session配置字典
        :return: None
        """
        if self._session is None:
            self._session = Session()

        attrs = [
            'headers', 'auth', 'proxies', 'hooks', 'params', 'verify', 'cert',
            'stream', 'trust_env', 'max_redirects'
        ]  # , 'adapters'

        if 'cookies' in data:
            self.set_cookies(data['cookies'], set_session=True)

        for i in attrs:
            if i in data:
                self._session.__setattr__(i, data[i])

    def cookies_to_session(self, copy_user_agent: bool = False) -> None:
        """把driver对象的cookies复制到session对象    \n
        :param copy_user_agent: 是否复制ua信息
        :return: None
        """
        if copy_user_agent:
            user_agent_to_session(self.driver, self.session)

        self.set_cookies(self.driver.get_cookies(), set_session=True)

    def cookies_to_driver(self, url: str) -> None:
        """把session对象的cookies复制到driver对象  \n
        :param url: 作用域
        :return: None
        """
        browser_domain = extract(self.driver.current_url).fqdn
        ex_url = extract(url)

        if ex_url.fqdn not in browser_domain:
            self.driver.get(url)

        domain = f'{ex_url.domain}.{ex_url.suffix}'

        cookies = []
        for cookie in self.session.cookies:
            if cookie.domain == '':
                cookie.domain = domain

            if domain in cookie.domain:
                cookies.append(cookie)

        self.set_cookies(cookies, set_driver=True)

    def close_driver(self, kill: bool = False) -> None:
        """关闭driver和浏览器"""
        if self._driver:
            if kill:
                self.kill_browser()
            else:
                self._driver.quit()

            self._driver = None

    def close_session(self) -> None:
        """关闭session"""
        if self._session:
            self._session.close()
            self._session = None

    def close(self) -> None:
        """关闭session、driver和浏览器"""
        if self._driver:
            self.close_driver()

        if self._session:
            self.close_session()
コード例 #3
0
class Drission(object):
    """Drission类用于管理WebDriver对象和Session对象,是驱动器的角色"""
    def __init__(self,
                 driver_or_options: Union[WebDriver, dict, Options,
                                          DriverOptions] = None,
                 session_or_options: Union[Session, dict,
                                           SessionOptions] = None,
                 ini_path: str = None,
                 proxy: dict = None):
        """初始化,可接收现成的WebDriver和Session对象,或接收它们的配置信息生成对象       \n
        :param driver_or_options: driver对象或chrome设置,Options类或设置字典
        :param session_or_options: Session对象或设置
        :param ini_path: ini文件路径
        :param proxy: 代理设置
        """
        self._session = None
        self._driver = None
        self._proxy = proxy

        om = OptionsManager(
            ini_path
        ) if session_or_options is None or driver_or_options is None else None

        # ------------------处理session options----------------------
        if session_or_options is None:
            self._session_options = om.session_options

        else:
            # 若接收到Session对象,直接记录
            if isinstance(session_or_options, Session):
                self._session = session_or_options

            # 否则记录其配置信息
            else:
                self._session_options = _session_options_to_dict(
                    session_or_options)

        # ------------------处理driver options----------------------
        if driver_or_options is None:
            self._driver_options = om.chrome_options
            self._driver_options['driver_path'] = om.get_value(
                'paths', 'chromedriver_path')

        else:
            # 若接收到WebDriver对象,直接记录
            if isinstance(driver_or_options, WebDriver):
                self._driver = driver_or_options

            # 否则记录其配置信息
            else:
                self._driver_options = _chrome_options_to_dict(
                    driver_or_options)

    @property
    def session(self) -> Session:
        """返回Session对象,如未初始化则按配置信息创建"""
        if self._session is None:
            self._set_session(self._session_options)

            if self._proxy:
                self._session.proxies = self._proxy

        return self._session

    @property
    def driver(self) -> WebDriver:
        """返回WebDriver对象,如未初始化则按配置信息创建。         \n
        如设置了本地调试浏览器,可自动接入或打开浏览器进程。
        """
        if self._driver is None:
            if isinstance(self._driver_options, dict):
                options = _dict_to_chrome_options(self._driver_options)
            else:
                raise TypeError('Driver options invalid')

            if self._proxy:
                options.add_argument(f'--proxy-server={self._proxy["http"]}')

            driver_path = self._driver_options.get('driver_path',
                                                   None) or 'chromedriver'

            try:
                if options.debugger_address and _check_port(
                        options.debugger_address) is False:
                    from subprocess import Popen
                    port = options.debugger_address.split(':')[-1]
                    chrome_path = self._driver_options.get(
                        'binary_location', None) or 'chrome.exe'
                    Popen(f'{chrome_path} --remote-debugging-port={port}',
                          shell=False)

                self._driver = webdriver.Chrome(driver_path, options=options)

            except SessionNotCreatedException:
                print(
                    'Chrome版本与chromedriver版本不匹配,可执行easy_set.get_match_driver()自动下载匹配的版本。'
                )
                exit(0)

            # 反爬设置,似乎没用
            self._driver.execute_cdp_cmd(
                "Page.addScriptToEvaluateOnNewDocument", {
                    "source":
                    """
                Object.defineProperty(navigator, 'webdriver', {
                  get: () => Chrome
                })
              """
                })

        return self._driver

    @property
    def driver_options(self) -> dict:
        """返回driver配置信息"""
        return self._driver_options

    @property
    def session_options(self) -> dict:
        """返回session配置信息"""
        return self._session_options

    @session_options.setter
    def session_options(self, options: Union[dict, SessionOptions]) -> None:
        """设置session配置                  \n
        :param options: session配置字典
        :return: None
        """
        self._session_options = _session_options_to_dict(options)
        self._set_session(self._session_options)

    @property
    def proxy(self) -> Union[None, dict]:
        """返回代理信息"""
        return self._proxy

    @proxy.setter
    def proxy(self, proxies: dict = None) -> None:
        """设置代理信息                \n
        :param proxies: 代理信息字典
        :return: None
        """
        self._proxy = proxies

        if self._session:
            self._session.proxies = proxies

        if self._driver:
            cookies = self._driver.get_cookies()
            url = self._driver.current_url
            self._driver.quit()
            self._driver = None
            self._driver = self.driver
            self._driver.get(url)

            for cookie in cookies:
                self.set_cookies(cookie, set_driver=True)

    def set_cookies(self,
                    cookies: Union[RequestsCookieJar, list, tuple, str, dict],
                    set_session: bool = False,
                    set_driver: bool = False) -> None:
        """设置cookies                                                      \n
        :param cookies: cookies信息,可为CookieJar, list, tuple, str, dict
        :param set_session: 是否设置session的cookies
        :param set_driver: 是否设置driver的cookies
        :return: None
        """
        cookies = _cookies_to_tuple(cookies)

        for cookie in cookies:
            if cookie['value'] is None:
                cookie['value'] = ''

            # 添加cookie到session
            if set_session:
                kwargs = {
                    x: cookie[x]
                    for x in cookie
                    if x not in ('name', 'value', 'httpOnly', 'expiry')
                }

                if 'expiry' in cookie:
                    kwargs['expires'] = cookie['expiry']

                self.session.cookies.set(cookie['name'], cookie['value'],
                                         **kwargs)

            # 添加cookie到driver
            if set_driver:
                if 'expiry' in cookie:
                    cookie['expiry'] = int(cookie['expiry'])

                try:
                    browser_domain = extract(self.driver.current_url).fqdn
                except AttributeError:
                    browser_domain = ''

                if not cookie.get('domain', None):
                    if browser_domain:
                        url = extract(browser_domain)
                        cookie_domain = f'{url.domain}.{url.suffix}'
                    else:
                        raise ValueError(
                            'There is no domain name in the cookie or the browser has not visited a URL.'
                        )

                    cookie['domain'] = cookie_domain

                else:
                    cookie_domain = cookie['domain'] if cookie['domain'][
                        0] != '.' else cookie['domain'][1:]

                if cookie_domain not in browser_domain:
                    self.driver.get(cookie_domain if cookie_domain.startswith(
                        'http://') else f'http://{cookie_domain}')

                self.driver.add_cookie(cookie)

    def _set_session(self, data: dict) -> None:
        if self._session is None:
            self._session = Session()

        attrs = [
            'headers', 'auth', 'proxies', 'hooks', 'params', 'verify', 'cert',
            'stream', 'trust_env', 'max_redirects'
        ]  # , 'adapters'

        if 'cookies' in data:
            self.set_cookies(data['cookies'], set_session=True)

        for i in attrs:
            if i in data:
                self._session.__setattr__(i, data[i])

    def cookies_to_session(self, copy_user_agent: bool = False) -> None:
        """把driver对象的cookies复制到session对象    \n
        :param copy_user_agent: 是否复制ua信息
        :return: None
        """
        if copy_user_agent:
            self.user_agent_to_session(self.driver, self.session)

        self.set_cookies(self.driver.get_cookies(), set_session=True)

    def cookies_to_driver(self, url: str) -> None:
        """把session对象的cookies复制到driver对象  \n
        :param url: 作用域
        :return: None
        """
        browser_domain = extract(self.driver.current_url).fqdn
        ex_url = extract(url)

        if ex_url.fqdn not in browser_domain:
            self.driver.get(url)

        domain = f'{ex_url.domain}.{ex_url.suffix}'

        cookies = []
        for cookie in self.session.cookies:
            if cookie.domain == '':
                cookie.domain = domain

            if domain in cookie.domain:
                cookies.append(cookie)

        self.set_cookies(cookies, set_driver=True)

    def user_agent_to_session(self,
                              driver: WebDriver = None,
                              session: Session = None) -> None:
        """把driver的user-agent复制到session    \n
        :param driver: 来源driver对象
        :param session: 目标session对象
        :return: None
        """
        driver = driver or self.driver
        session = session or self.session
        selenium_user_agent = driver.execute_script(
            "return navigator.userAgent;")
        session.headers.update({"User-Agent": selenium_user_agent})

    def close_driver(self) -> None:
        """关闭driver和浏览器"""
        if self._driver:
            self._driver.quit()
            self._driver = None

    def close_session(self) -> None:
        """关闭session"""
        if self._session:
            self._session.close()
            self._session = None

    def close(self) -> None:
        """关闭session、driver和浏览器"""
        if self._driver:
            self.close_driver()

        if self._session:
            self.close_session()

    def __del__(self):
        """关闭对象时关闭浏览器和Session"""
        try:
            self.close()
        except ImportError:
            pass
コード例 #4
0
class Drission(object):
    """Drission类用于管理WebDriver对象和Session对象,是驱动器的角色"""
    def __init__(self,
                 driver_or_options: Union[WebDriver, dict, Options,
                                          DriverOptions] = None,
                 session_or_options: Union[Session, dict,
                                           SessionOptions] = None,
                 ini_path: str = None,
                 proxy: dict = None):
        """初始化,可接收现成的WebDriver和Session对象,或接收它们的配置信息生成对象       \n
        :param driver_or_options: driver对象或chrome设置,Options类或设置字典
        :param session_or_options: Session对象或设置
        :param ini_path: ini文件路径
        :param proxy: 代理设置
        """
        self._session = None
        self._driver = None
        self._debugger = None
        self._proxy = proxy

        om = OptionsManager(
            ini_path
        ) if session_or_options is None or driver_or_options is None else None

        # ------------------处理session options----------------------
        if session_or_options is None:
            self._session_options = om.session_options

        else:
            # 若接收到Session对象,直接记录
            if isinstance(session_or_options, Session):
                self._session = session_or_options

            # 否则记录其配置信息
            else:
                self._session_options = _session_options_to_dict(
                    session_or_options)

        # ------------------处理driver options----------------------
        if driver_or_options is None:
            self._driver_options = om.chrome_options
            self._driver_options['driver_path'] = om.get_value(
                'paths', 'chromedriver_path')

        else:
            # 若接收到WebDriver对象,直接记录
            if isinstance(driver_or_options, WebDriver):
                self._driver = driver_or_options

            # 否则记录其配置信息
            else:
                self._driver_options = _chrome_options_to_dict(
                    driver_or_options)

    @property
    def session(self) -> Session:
        """返回Session对象,如未初始化则按配置信息创建"""
        if self._session is None:
            self._set_session(self._session_options)

            if self._proxy:
                self._session.proxies = self._proxy

        return self._session

    @property
    def driver(self) -> WebDriver:
        """返回WebDriver对象,如未初始化则按配置信息创建。         \n
        如设置了本地调试浏览器,可自动接入或打开浏览器进程。
        """
        if self._driver is None:
            if isinstance(self._driver_options, dict):
                options = _dict_to_chrome_options(self._driver_options)
            else:
                raise TypeError('Driver options invalid')

            if self._proxy:
                options.add_argument(f'--proxy-server={self._proxy["http"]}')

            driver_path = self._driver_options.get('driver_path',
                                                   None) or 'chromedriver'
            chrome_path = self._driver_options.get('binary_location',
                                                   None) or 'chrome.exe'

            # -----------若指定debug端口且该端口未在使用中,则先启动浏览器进程-----------
            if options.debugger_address and _check_port(
                    options.debugger_address) is False:
                from subprocess import Popen
                port = options.debugger_address[options.debugger_address.
                                                rfind(':') + 1:]

                try:
                    self._debugger = Popen(
                        f'{chrome_path} --remote-debugging-port={port}',
                        shell=False)

                    if chrome_path == 'chrome.exe':
                        from common import get_exe_path_from_port
                        chrome_path = get_exe_path_from_port(port)

                # 启动不了进程,主动找浏览器执行文件启动
                except FileNotFoundError:
                    from DrissionPage.easy_set import _get_chrome_path
                    chrome_path = _get_chrome_path(show_msg=False)

                    if not chrome_path:
                        raise FileNotFoundError('无法找到chrome.exe路径,请手动配置。')

                    self._debugger = Popen(
                        f'"{chrome_path}" --remote-debugging-port={port}',
                        shell=False)

            # -----------创建WebDriver对象-----------
            try:
                self._driver = webdriver.Chrome(driver_path, options=options)

            # 若版本不对,获取对应chromedriver再试
            except (WebDriverException, SessionNotCreatedException):
                from .easy_set import get_match_driver
                chrome_path = None if chrome_path == 'chrome.exe' else chrome_path
                driver_path = get_match_driver(chrome_path=chrome_path,
                                               check_version=False,
                                               show_msg=False)

                if driver_path:
                    try:
                        self._driver = webdriver.Chrome(driver_path,
                                                        options=options)
                    except:
                        print('无法启动,请检查chromedriver版本与Chrome是否匹配,并手动设置。')
                        exit(0)

                # 当找不到driver且chrome_path为None时,说明安装的版本过高,改在系统路径中查找
                elif chrome_path is None and driver_path is None:
                    from DrissionPage.easy_set import _get_chrome_path
                    chrome_path = _get_chrome_path(show_msg=False,
                                                   from_ini=False,
                                                   from_regedit=False)
                    driver_path = get_match_driver(chrome_path=chrome_path,
                                                   check_version=False,
                                                   show_msg=False)

                    if driver_path:
                        options.binary_location = chrome_path
                        try:
                            self._driver = webdriver.Chrome(driver_path,
                                                            options=options)
                        except:
                            print('无法启动,请检查chromedriver版本与Chrome是否匹配,并手动设置。')
                            exit(0)
                    else:
                        print('无法启动,请检查chromedriver版本与Chrome是否匹配,并手动设置。')
                        exit(0)
                else:
                    print('无法启动,请检查chromedriver版本与Chrome是否匹配,并手动设置。')
                    exit(0)

            # 反反爬设置
            try:
                self._driver.execute_script(
                    'Object.defineProperty(navigator,"webdriver",{get:() => Chrome,});'
                )
            except:
                pass

            # self._driver.execute_cdp_cmd(
            #     'Page.addScriptToEvaluateOnNewDocument',
            #     {'source': 'Object.defineProperty(navigator,"webdriver",{get:() => Chrome,});'})

        return self._driver

    @property
    def debugger_progress(self):
        """调试浏览器进程"""
        return self._debugger

    @property
    def driver_options(self) -> dict:
        """返回driver配置信息"""
        return self._driver_options

    @property
    def session_options(self) -> dict:
        """返回session配置信息"""
        return self._session_options

    @session_options.setter
    def session_options(self, options: Union[dict, SessionOptions]) -> None:
        """设置session配置                  \n
        :param options: session配置字典
        :return: None
        """
        self._session_options = _session_options_to_dict(options)
        self._set_session(self._session_options)

    @property
    def proxy(self) -> Union[None, dict]:
        """返回代理信息"""
        return self._proxy

    @proxy.setter
    def proxy(self, proxies: dict = None) -> None:
        """设置代理信息                \n
        :param proxies: 代理信息字典
        :return: None
        """
        self._proxy = proxies

        if self._session:
            self._session.proxies = proxies

        if self._driver:
            cookies = self._driver.get_cookies()
            url = self._driver.current_url
            self._driver.quit()
            self._driver = None
            self._driver = self.driver
            self._driver.get(url)

            for cookie in cookies:
                self.set_cookies(cookie, set_driver=True)

    def set_cookies(self,
                    cookies: Union[RequestsCookieJar, list, tuple, str, dict],
                    set_session: bool = False,
                    set_driver: bool = False) -> None:
        """设置cookies                                                      \n
        :param cookies: cookies信息,可为CookieJar, list, tuple, str, dict
        :param set_session: 是否设置session的cookies
        :param set_driver: 是否设置driver的cookies
        :return: None
        """
        cookies = _cookies_to_tuple(cookies)

        for cookie in cookies:
            if cookie['value'] is None:
                cookie['value'] = ''

            # 添加cookie到session
            if set_session:
                kwargs = {
                    x: cookie[x]
                    for x in cookie
                    if x.lower() not in ('name', 'value', 'httponly', 'expiry',
                                         'samesite')
                }

                if 'expiry' in cookie:
                    kwargs['expires'] = cookie['expiry']

                self.session.cookies.set(cookie['name'], cookie['value'],
                                         **kwargs)

            # 添加cookie到driver
            if set_driver:
                if 'expiry' in cookie:
                    cookie['expiry'] = int(cookie['expiry'])

                try:
                    browser_domain = extract(self.driver.current_url).fqdn
                except AttributeError:
                    browser_domain = ''

                if not cookie.get('domain', None):
                    if browser_domain:
                        url = extract(browser_domain)
                        cookie_domain = f'{url.domain}.{url.suffix}'
                    else:
                        raise ValueError(
                            'There is no domain name in the cookie or the browser has not visited a URL.'
                        )

                    cookie['domain'] = cookie_domain

                else:
                    cookie_domain = cookie['domain'] if cookie['domain'][
                        0] != '.' else cookie['domain'][1:]

                if cookie_domain not in browser_domain:
                    self.driver.get(cookie_domain if cookie_domain.startswith(
                        'http://') else f'http://{cookie_domain}')

                # 避免selenium自动添加.后无法正确覆盖已有cookie
                if cookie['domain'][0] != '.':
                    c = self.driver.get_cookie(cookie['name'])
                    if c and c['domain'] == cookie['domain']:
                        self.driver.delete_cookie(cookie['name'])

                self.driver.add_cookie(cookie)

    def _set_session(self, data: dict) -> None:
        if self._session is None:
            self._session = Session()

        attrs = [
            'headers', 'auth', 'proxies', 'hooks', 'params', 'verify', 'cert',
            'stream', 'trust_env', 'max_redirects'
        ]  # , 'adapters'

        if 'cookies' in data:
            self.set_cookies(data['cookies'], set_session=True)

        for i in attrs:
            if i in data:
                self._session.__setattr__(i, data[i])

    def cookies_to_session(self, copy_user_agent: bool = False) -> None:
        """把driver对象的cookies复制到session对象    \n
        :param copy_user_agent: 是否复制ua信息
        :return: None
        """
        if copy_user_agent:
            self.user_agent_to_session(self.driver, self.session)

        self.set_cookies(self.driver.get_cookies(), set_session=True)

    def cookies_to_driver(self, url: str) -> None:
        """把session对象的cookies复制到driver对象  \n
        :param url: 作用域
        :return: None
        """
        browser_domain = extract(self.driver.current_url).fqdn
        ex_url = extract(url)

        if ex_url.fqdn not in browser_domain:
            self.driver.get(url)

        domain = f'{ex_url.domain}.{ex_url.suffix}'

        cookies = []
        for cookie in self.session.cookies:
            if cookie.domain == '':
                cookie.domain = domain

            if domain in cookie.domain:
                cookies.append(cookie)

        self.set_cookies(cookies, set_driver=True)

    def user_agent_to_session(self,
                              driver: WebDriver = None,
                              session: Session = None) -> None:
        """把driver的user-agent复制到session    \n
        :param driver: 来源driver对象
        :param session: 目标session对象
        :return: None
        """
        driver = driver or self.driver
        session = session or self.session
        selenium_user_agent = driver.execute_script(
            "return navigator.userAgent;")
        session.headers.update({"User-Agent": selenium_user_agent})

    def close_driver(self) -> None:
        """关闭driver和浏览器"""
        if self._driver:
            self._driver.quit()
            self._driver = None

    def close_session(self) -> None:
        """关闭session"""
        if self._session:
            self._session.close()
            self._session = None

    def close(self) -> None:
        """关闭session、driver和浏览器"""
        if self._driver:
            self.close_driver()

        if self._session:
            self.close_session()

    def __del__(self):
        """关闭对象时关闭浏览器和Session"""
        try:
            self.close()
        except ImportError:
            pass