def get_browser(headless: bool = True, browser_class: int = 1, init_args: dict = None) -> Firefox: """ 获取一个浏览器 :param headless: :param browser_class: 浏览器种类,0是谷歌, 1 是火狐, 服务器端不能使用谷歌 :param init_args: 初始化字典 :return: """ """ selenium安装方法: pip3 install selenium firefox的headless浏览器 因为headless的浏览器的语言跟随操作系统,为了保证爬回来的数据是正确的语言, 这里必须设置浏览器的初始化参数, 注意,使用headless必须先安装对应浏览器正常的版本,然后再安装headless版本 比如火狐的headless 下载火狐的geckodriver驱动。(当前文件夹下已经有一个了)地址是: https://github.com/mozilla/geckodriver/releases 下载后解压是一个geckodriver 文件。拷贝到/usr/local/bin目录下,然后加上可执行的权限 sudo chmod +x /usr/local/bin/geckodriver chrome的headless浏览器 https://chromedriver.storage.googleapis.com/index.html?path=2.35/ 你也可以自行搜索chromedriver的下载地址,解压是个可执行文件,放到chrome的目录即可. 一般ubuntu下面,chrome的目录是/opt/google/chrome/ 据说使用root权限运行的话,chrome的headless浏览器会报异常.而firefox的headless浏览器不会! """ if browser_class == 1: profile = FirefoxProfile() profile.set_preference("intl.accept_languages", "zh-cn") if isinstance(init_args, dict): for k, v in init_args.items(): profile.set_preference(k, v) else: pass options = FirefoxOptions() options.add_argument("--headless") if headless: try: browser = Firefox(firefox_profile=profile, executable_path=firefox_driver, firefox_options=options) except Exception as e: title = "{} Firefox headless浏览器打开失败".format( datetime.datetime.now()) content = "错误原因是:{}".format(e) send_mail(title=title, content=content) logger.exception(e) raise e else: try: browser = Firefox( firefox_profile=profile, executable_path=firefox_driver, ) except Exception as e: title = "{} Firefox headless浏览器打开失败".format( datetime.datetime.now()) content = "错误原因是:{}".format(e) send_mail(title=title, content=content) logger.exception(e) raise e else: options = ChromeOptions() options.add_experimental_option("excludeSwitches", ["ignore-certificate-errors"]) if headless: options.add_argument("--headless") try: browser = Chrome(executable_path=chrome_driver, chrome_options=options) except Exception as e: title = "{} Chrome headless浏览器打开失败".format( datetime.datetime.now()) content = "错误原因是:{}".format(e) send_mail(title=title, content=content) logger.exception(e) raise e else: try: browser = Chrome(executable_path=chrome_driver, chrome_options=options) except Exception as e: title = "{} Chrome headless浏览器打开失败".format( datetime.datetime.now()) content = "错误原因是:{}".format(e) send_mail(title=title, content=content) # 这是我自定义的方法 logger.exception(e) raise e return browser
def generate_download_folder_capability_by_firefox(self): options = FirefoxOptions() options.add_experimental_option("prefs", self._prefs) return options