def init_browser(): global browser if "chrome_type" in config and config['chrome_type'] == "msedge": chrome_options = EdgeOptions() chrome_options.use_chromium = True else: chrome_options = webdriver.ChromeOptions() chrome_options.add_argument('--ignore-certificate-errors') chrome_options.add_argument('--ignore-ssl-errors') chrome_options.add_argument('--use-fake-ui-for-media-stream') chrome_options.add_experimental_option( 'prefs', { 'credentials_enable_service': False, 'profile.default_content_setting_values.media_stream_mic': 1, 'profile.default_content_setting_values.media_stream_camera': 1, 'profile.default_content_setting_values.geolocation': 1, 'profile.default_content_setting_values.notifications': 1, 'profile': { 'password_manager_enabled': False } }) chrome_options.add_argument('--no-sandbox') chrome_options.add_experimental_option('excludeSwitches', ['enable-automation']) if 'headless' in config and config['headless']: chrome_options.add_argument('--headless') print("Enabled headless mode") if 'mute_audio' in config and config['mute_audio']: chrome_options.add_argument("--mute-audio") if 'chrome_type' in config: if config['chrome_type'] == "chromium": browser = webdriver.Chrome( ChromeDriverManager(chrome_type=ChromeType.CHROMIUM).install(), options=chrome_options) elif config['chrome_type'] == "msedge": browser = Edge(EdgeChromiumDriverManager().install(), options=chrome_options) else: browser = webdriver.Chrome(ChromeDriverManager().install(), options=chrome_options) else: browser = webdriver.Chrome(ChromeDriverManager().install(), options=chrome_options) # make the window a minimum width to show the meetings menu window_size = browser.get_window_size() if window_size['width'] < 1200: print("Resized window width") browser.set_window_size(1200, window_size['height']) if window_size['height'] < 850: print("Resized window height") browser.set_window_size(window_size['width'], 850)
def SetUpEdge(self): """Product name: Microsoft WebDriver Product version 83.0.478.58 * Edge gets wordy when it's headless, but at least it's working (by setting window size) * At the time of this refactor for Selenium 4, Edge does not yet support the new API, so I'm using the legacy one""" options = EdgeOptions() options.use_chromium = True #EdgeOptions.AddArguments("headless") # this version of selenium doesn't have addarguments for edge options.headless = True # I got this to work by setting the handler window size try: handler = Edge(executable_path=Path(self.handler_path + 'msedgedriver.exe'), options=options) handler.set_window_size( 1600, 1200 ) # set the browser handler window size so that headless will work with sendkeys logging.info( f"{datetime.now(tz=None)} Info {self.browse} browser handler found" ) except (WebDriverException): logging.info( f"{datetime.now(tz=None)} Warning {self.browse} browser handler not found or failed to launch." ) handler = None return handler # ignore the handshake errors
def get_browser(_config): """ 获取浏览器对象 :return: """ browser_type = _config['selenium']['browserType'] headless = _config['selenium']['headless'] binary = _config['selenium']['binary'] user_agent = _config['user-agent'][0] try: if browser_type == 'Chrome': chrome_options = webdriver.ChromeOptions() # 防止在某些情况下报错` chrome_options.add_argument('--no-sandbox') chrome_options.add_argument('--disable-dev-shm-usage') chrome_options.add_experimental_option( "excludeSwitches", ['enable-automation', 'enable-logging']) chrome_options.add_argument(f'user-agent={user_agent}') if binary != "": # 当找不到浏览器时需要在 config 里配置路径 chrome_options.binary_location = binary if headless: chrome_options.add_argument('--headless') chrome_options.add_argument('--disable-gpu') if sys.platform == 'linux': _browser = webdriver.Chrome( executable_path=get_file("./drivers/chromedriver"), desired_capabilities={}, options=chrome_options) elif sys.platform == 'darwin': _browser = webdriver.Chrome( executable_path=get_file("./drivers/chromedriver"), desired_capabilities={}, options=chrome_options) elif sys.platform == 'win32': _browser = webdriver.Chrome( executable_path=get_file("./drivers/chromedriver"), desired_capabilities={}, options=chrome_options) _browser.set_window_size(500, 700) elif browser_type == 'Edge': from msedge.selenium_tools import Edge, EdgeOptions edge_options = EdgeOptions() edge_options.use_chromium = True edge_options.add_argument('--no-sandbox') edge_options.add_argument('--disable-dev-shm-usage') edge_options.add_experimental_option( "excludeSwitches", ['enable-automation', 'enable-logging']) if binary != "": edge_options.binary_location = binary if headless: edge_options.add_argument('--headless') edge_options.add_argument('--disable-gpu') if sys.platform == 'linux': _browser = Edge( executable_path=get_file("./drivers/msedgedriver"), options=edge_options, capabilities={}) elif sys.platform == 'darwin': _browser = Edge( executable_path=get_file("./drivers/msedgedriver"), capabilities={}, options=edge_options) elif sys.platform == 'win32': _browser = Edge( executable_path=get_file("./drivers/msedgedriver"), capabilities={}, options=edge_options) _browser.set_window_size(500, 700) elif browser_type == 'Firefox': # 先清除上次的日志 if not os.path.exists(get_file("./logs")): os.mkdir(get_file("./logs/")) open(get_file("./logs/geckodriver.log"), "w").close() firefox_options = webdriver.FirefoxOptions() firefox_options.log.level = "fatal" if binary != "": firefox_options.binary_location = binary if headless: firefox_options.add_argument('--headless') firefox_options.add_argument('--disable-gpu') if sys.platform == 'linux': _browser = webdriver.Firefox( executable_path=get_file('./drivers/geckodriver'), options=firefox_options, service_log_path=get_file("./logs/geckodriver.log")) elif sys.platform == 'darwin': _browser = webdriver.Firefox( executable_path=get_file('./drivers/geckodriver'), options=firefox_options) elif sys.platform == 'win32': _browser = webdriver.Firefox( executable_path=get_file('./drivers/geckodriver'), options=firefox_options) _browser.set_window_size(500, 700) else: raise WebDriverException return _browser except WebDriverException: # 驱动问题 print("ERROR", "浏览器错误", "请检查你下载并解压好的驱动是否放在drivers目录下")
class AbstractDriver(): driver = None browser = None driverPath = None driverFolder = Path.cwd() / "driver" driverInstalledBool = False headless = False userAgent = None announcer = None pathStatusStream = None driverStatusStream = None def __init__(self, announcer): self.pathStatusStream = messageAnnouncer.MessageAnnouncer() self.driverStatusStream = messageAnnouncer.MessageAnnouncer() self.announcer = announcer def getDriverPathStatus(self): x = threading.Thread( target=self._getDriverPathStatus ) x.start() def _getDriverPathStatus(self): respDict = { "status": 0, "eventSourceUrl": "/admin/stream/getDriverPathStatus", "title": "Driver name", "headerBadge": { "caption": "", "content": "", }, "action": [ { "name": "Download driver", "actionUrl": "http://localhost:5000/admin/driver/" "Edg?headless=false", "enabled": True } ], "bodyBadge": { "caption": "", "content": "", }, } errorIcon = '<i class="material-icons">error</i>' if self.driverPath is None: self.getDriverPath(self.driverFolder, None) if self.driverInstalledBool is False: respDict["status"] = 1 respDict["headerBadge"]["caption"] = "Driver not installed" respDict["headerBadge"]["content"] = errorIcon respDict["bodyBadge"]["caption"] = "Driver not installed" respDict["bodyBadge"]["content"] = errorIcon msgText = json.dumps( respDict, default=str ) else: respDict["status"] = 0 respDict["headerBadge"]["caption"] = "driver" respDict["headerBadge"]["content"] = str(self.driverPath.stem) respDict["action"][0]["enabled"] = False msgText = json.dumps( respDict, default=str ) else: respDict["status"] = 0 respDict["headerBadge"]["caption"] = "driver" respDict["headerBadge"]["content"] = str(self.driverPath.stem) respDict["action"][0]["enabled"] = False msgText = json.dumps( respDict, default=str ) self.pathStatusStream.announce( self.pathStatusStream.format_sse(msgText) ) def getDriverStatus(self): x = threading.Thread( target=self._getDriverStatus ) x.start() def _getDriverStatus(self): respDict = { "status": 0, "eventSourceUrl": "/admin/stream/getDriverStatus", "title": "Driver status", "headerBadge": { "caption": "", "content": "", }, "action": [ { "name": "Start driver", "actionUrl": "http://localhost:5000/admin/startDriver", "enabled": True } ], "bodyBadge": { "caption": "", "content": "", }, } errorIcon = '<i class="material-icons">error</i>' try: self.driver.window_handles except WebDriverException as e: respDict["status"] = 1 respDict["headerBadge"]["caption"] = "Not started" respDict["headerBadge"]["content"] = errorIcon respDict["bodyBadge"]["caption"] = str(e) respDict["bodyBadge"]["content"] = errorIcon msgText = json.dumps( respDict, default=str ) except AttributeError as e: respDict["status"] = 1 respDict["headerBadge"]["caption"] = "Not started" respDict["headerBadge"]["content"] = errorIcon respDict["bodyBadge"]["caption"] = str(e) respDict["bodyBadge"]["content"] = errorIcon msgText = json.dumps( respDict, default=str ) else: respDict["status"] = 0 respDict["headerBadge"]["caption"] = "instance" respDict["headerBadge"]["content"] = \ str(self.driver.window_handles[0]) respDict["action"][0]["enabled"] = False msgText = json.dumps( respDict, default=str ) finally: self.driverStatusStream.announce( self.driverStatusStream.format_sse(msgText) ) def checkDriver(self): if self.driver is None: print("Driver not started") print("Starting programatically") print("Assuming you installed only required drivers") _, self.driverPath = self.getDriverPath(self.driverFolder, None) if self.driverPath.name == "msedgedriver.exe": self.browser = "Edg" elif self.driverPath.name == "chromedriver.exe": self.browser = "Chrome" else: print("Browser not supported yet") # make browser headless or not self.createDriver(self.browser, self.driverPath, False) else: try: self.driver.execute(Command.STATUS) except MaxRetryError: self.createDriver(self.browser, self.driverPath, False) def getDriver(self): self.checkDriver() return self.driver def getDriverPath(self, driverFolder, browser=None): """ Check if driver is installed and returns path Args: driverFolder (Path): Pathlib path to driver folder browser (string, optional): Browser type. Defaults to None. Returns: driverInstalledBool (bool): True if driver was found driverPath (Path): Driver + driver name path """ for driverPath in list(driverFolder.glob('**/*.exe')): if browser is not None: if browser.lower() in driverPath.name: self.driverInstalledBool = True self.driverPath = driverPath else: self.driverInstalledBool = True self.driverPath = driverPath return self.driverInstalledBool, self.driverPath def downloadDriver(self, browser, headlessStr, userAgent): self.browser = browser if headlessStr.lower() == "true": self.headless = True else: self.headless = False self.userAgent = userAgent x = threading.Thread( target=self._downloadDriver ) x.start() def _downloadDriver(self): """ Creates selenium driver for webscrapping automation Downloads it into driver folder if not installed """ if not self.driverFolder.exists(): os.mkdir("driver") msgTxt = "User agent: " + self.userAgent + "<br>" self.announcer.announce(self.announcer.format_sse(msgTxt)) for browserVersion in self.userAgent.split(" "): if browserVersion.split("/")[0] == self.browser: version = browserVersion.split("/")[1] if len(version) == 0: # output += "Browser not found, options are - # Mozilla, # AppleWebKit, # Chrome, # Safari, # Edg msgTxt = "Error: Browser not found, options are - Chrome, Edg <br>" self.announcer.announce(self.announcer.format_sse(msgTxt)) # get driver path self.driverInstalledBool, self.driverPath = self.getDriverPath( self.driverFolder, self.browser ) # download driver if not self.driverInstalledBool: msgTxt = "Installing driver <br>" self.announcer.announce(self.announcer.format_sse(msgTxt)) if self.browser == "Chrome": browserDriverDownloadPage, _, _ = download.getRequest( "https://chromedriver.chromium.org/downloads" ) pattern = r"ChromeDriver (" \ + version.split(".")[0] \ + r"\.\d*\.\d*\.\d*)" existingDriverVersion = re.findall( pattern, browserDriverDownloadPage.content.decode("utf-8") )[0] browserDriverDownloadUrl = \ "https://chromedriver.storage.googleapis.com/" \ + existingDriverVersion \ + "/chromedriver_win32.zip" elif self.browser == "Edg": browserDriverDownloadUrl = \ "https://msedgedriver.azureedge.net/" \ + version \ + "/edgedriver_win64.zip" else: print("Browser not supported yet") msgTxt = "Driver URL: " + browserDriverDownloadUrl + "<br>" self.announcer.announce(self.announcer.format_sse(msgTxt)) driverRequest = download.getRequest(browserDriverDownloadUrl)[0] driverZip = zipfile.ZipFile(io.BytesIO(driverRequest.content)) driverZip.extractall(self.driverFolder) msgTxt = "Downloaded and extracted driver <br>" self.announcer.announce(self.announcer.format_sse(msgTxt)) # get driver path self.driverInstalledBool, self.driverPath = self.getDriverPath( self.driverFolder, self.browser ) else: msgTxt = "Driver already satisfied <br>" self.announcer.announce(self.announcer.format_sse(msgTxt)) # Create driver self.driver = self.createDriver( self.browser, self.driverPath, self.headless ) msgTxt = "Started Driver <br>" self.announcer.announce(self.announcer.format_sse(msgTxt)) def createDriver(self, browser, driverPath, headless=None): """ Start selenium web driver Args: browser (str): Browser type driverPath (Path): Path to driver headless (bool): Headless bool Returns: driver: selenium driver """ self.headless = headless if browser == "Edg": edge_options = EdgeOptions() if self.headless: # make Edge headless edge_options.use_chromium = True edge_options.add_argument("headless") edge_options.add_argument("disable-gpu") edge_options.add_argument("--log-level=3") edge_options.add_experimental_option( 'excludeSwitches', ['enable-logging'] ) # edge_options.page_load_strategy("eager") self.driver = Edge( executable_path=str(driverPath), options=edge_options ) elif browser == "Chrome": chrome_options = Options() if self.headless: chrome_options.add_argument("--headless") chrome_options.add_argument("--log-level=3") chrome_options.add_experimental_option( 'excludeSwitches', ['enable-logging'] ) # chrome_options.page_load_strategy("eager") # don't know the chrome command self.driver = webdriver.Chrome( executable_path=str(driverPath), options=chrome_options ) else: print("Browser not supported yet") self.driver.set_window_size(1800, 1080) self.driver.set_page_load_timeout(100000) return self.driver
def get_browser(_config_, path_prefix=""): """ 获取浏览器对象 :return: """ browser_type = _config_['selenium']['browserType'] headless = _config_['selenium']['headless'] binary = _config_['selenium']['binary'] user_agent = _config_['user-agent'][0] _browser_ = None try: if browser_type == 'Chrome': chrome_options = webdriver.ChromeOptions() # 防止在某些情况下报错` chrome_options.add_argument('--no-sandbox') chrome_options.add_argument('--disable-dev-shm-usage') chrome_options.add_experimental_option( "excludeSwitches", ['enable-automation', 'enable-logging']) chrome_options.add_argument(f'user-agent={user_agent}') if binary != "": # 当找不到浏览器时需要在 config 里配置路径 chrome_options.binary_location = binary if headless: chrome_options.add_argument('--headless') chrome_options.add_argument('--disable-gpu') if sys.platform == 'linux': _browser_ = webdriver.Chrome( executable_path=get_file(path_prefix + "./drivers/chromedriver"), desired_capabilities={}, options=chrome_options) elif sys.platform == 'darwin': _browser_ = webdriver.Chrome( executable_path=get_file(path_prefix + "./drivers/chromedriver"), desired_capabilities={}, options=chrome_options) elif sys.platform == 'win32': _browser_ = webdriver.Chrome( executable_path=get_file(path_prefix + "./drivers/chromedriver"), desired_capabilities={}, options=chrome_options) _browser_.set_window_size(500, 700) elif browser_type == 'Edge': from msedge.selenium_tools import Edge, EdgeOptions edge_options = EdgeOptions() edge_options.use_chromium = True edge_options.add_argument('--no-sandbox') edge_options.add_argument('--disable-dev-shm-usage') edge_options.add_experimental_option( "excludeSwitches", ['enable-automation', 'enable-logging']) if binary != "": edge_options.binary_location = binary if headless: edge_options.add_argument('--headless') edge_options.add_argument('--disable-gpu') if sys.platform == 'linux': _browser_ = Edge( executable_path=get_file(path_prefix + "./drivers/msedgedriver"), options=edge_options, capabilities={}) elif sys.platform == 'darwin': _browser_ = Edge( executable_path=get_file(path_prefix + "./drivers/msedgedriver"), capabilities={}, options=edge_options) elif sys.platform == 'win32': _browser_ = Edge( executable_path=get_file(path_prefix + "./drivers/msedgedriver"), capabilities={}, options=edge_options) _browser_.set_window_size(500, 700) elif browser_type == 'Firefox': # 先清除上次的日志 if not os.path.exists(get_file("./logs")): os.mkdir(get_file("./logs/")) open(get_file("./logs/geckodriver.log"), "w").close() firefox_options = webdriver.FirefoxOptions() firefox_options.log.level = "fatal" if binary != "": firefox_options.binary_location = binary if headless: firefox_options.add_argument('--headless') firefox_options.add_argument('--disable-gpu') if sys.platform == 'linux': _browser_ = webdriver.Firefox( executable_path=get_file('./drivers/geckodriver'), options=firefox_options, service_log_path=get_file("./logs/geckodriver.log")) elif sys.platform == 'darwin': _browser_ = webdriver.Firefox( executable_path=get_file('./drivers/geckodriver'), options=firefox_options) elif sys.platform == 'win32': _browser_ = webdriver.Firefox( executable_path=get_file('./drivers/geckodriver'), options=firefox_options) _browser_.set_window_size(500, 700) else: raise WebDriverException return _browser_ except WebDriverException as e: # 驱动问题 if "This version of ChromeDriver only supports Chrome version" in e.args.__str__( ): print("\r[%s] [ERROR] 浏览器错误(chromedriver版本错误),请比对前三位版本号" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))) elif "'chromedriver' executable needs to be in PATH" in e.args.__str__( ): print("\r[%s] [ERROR] 浏览器错误,请检查你下载并解压好的驱动是否放在drivers目录下" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))) elif "unknown error: cannot find Chrome binary" in e.args.__str__(): print( "\r[%s] [ERROR] 浏览器错误(Chrome浏览器可执行文件路径未成功识别),请在配置文件中修改selenium.binary为浏览器可执行文件绝对路径" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))) else: print( "\r[%s] [ERROR] 浏览器错误, 请检查你下载并解压好的驱动是否放在drivers目录下,如需帮助请及时反馈; err: %s" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), e.args.__str__())) sys.exit(1)