Ejemplo n.º 1
0
def get_browser(headless: bool = True,
                browser_class: int = 1,
                init_args: dict = None) -> Firefox:
    """
    获取一个浏览器
    :param headless:
    :param browser_class: 浏览器种类,0是谷歌, 1 是火狐, 服务器端不能使用谷歌
    :param init_args: 初始化字典
    :return:
    """
    """
    selenium安装方法: pip3 install selenium
    firefox的headless浏览器
    因为headless的浏览器的语言跟随操作系统,为了保证爬回来的数据是正确的语言,
    这里必须设置浏览器的初始化参数,
    注意,使用headless必须先安装对应浏览器正常的版本,然后再安装headless版本
    比如火狐的headless
    下载火狐的geckodriver驱动。(当前文件夹下已经有一个了)地址是:
    https://github.com/mozilla/geckodriver/releases
    下载后解压是一个geckodriver 文件。拷贝到/usr/local/bin目录下,然后加上可执行的权限
    sudo chmod +x /usr/local/bin/geckodriver
    chrome的headless浏览器
    https://chromedriver.storage.googleapis.com/index.html?path=2.35/
    你也可以自行搜索chromedriver的下载地址,解压是个可执行文件,放到chrome的目录即可.
    一般ubuntu下面,chrome的目录是/opt/google/chrome/
    据说使用root权限运行的话,chrome的headless浏览器会报异常.而firefox的headless浏览器不会!
    """
    if browser_class == 1:
        profile = FirefoxProfile()
        profile.set_preference("intl.accept_languages", "zh-cn")
        if isinstance(init_args, dict):
            for k, v in init_args.items():
                profile.set_preference(k, v)
        else:
            pass
        options = FirefoxOptions()
        options.add_argument("--headless")
        if headless:
            try:
                browser = Firefox(firefox_profile=profile,
                                  executable_path=firefox_driver,
                                  firefox_options=options)
            except Exception as e:
                title = "{} Firefox headless浏览器打开失败".format(
                    datetime.datetime.now())
                content = "错误原因是:{}".format(e)
                send_mail(title=title, content=content)
                logger.exception(e)
                raise e
        else:
            try:
                browser = Firefox(
                    firefox_profile=profile,
                    executable_path=firefox_driver,
                )
            except Exception as e:
                title = "{} Firefox headless浏览器打开失败".format(
                    datetime.datetime.now())
                content = "错误原因是:{}".format(e)
                send_mail(title=title, content=content)
                logger.exception(e)
                raise e
    else:
        options = ChromeOptions()
        options.add_experimental_option("excludeSwitches",
                                        ["ignore-certificate-errors"])
        if headless:
            options.add_argument("--headless")
            try:
                browser = Chrome(executable_path=chrome_driver,
                                 chrome_options=options)
            except Exception as e:
                title = "{} Chrome headless浏览器打开失败".format(
                    datetime.datetime.now())
                content = "错误原因是:{}".format(e)
                send_mail(title=title, content=content)
                logger.exception(e)
                raise e
        else:
            try:
                browser = Chrome(executable_path=chrome_driver,
                                 chrome_options=options)
            except Exception as e:
                title = "{} Chrome headless浏览器打开失败".format(
                    datetime.datetime.now())
                content = "错误原因是:{}".format(e)
                send_mail(title=title, content=content)  # 这是我自定义的方法
                logger.exception(e)
                raise e
    return browser
Ejemplo n.º 2
0
 def generate_download_folder_capability_by_firefox(self):
     options = FirefoxOptions()
     options.add_experimental_option("prefs", self._prefs)
     return options