def carregando_url(webdriver, url): webdriver.set_page_load_timeout(30) webdriver.set_page.get(url) url = requests.get(url) print(url.status_code) def instanciar_driver(): teste = carregando_url(webdriver, "https://www.google.com.br") assert teste == 200, "Erro ao carregar a página"
def get_browserstack_webdriver(capabilities): capabilities.setdefault("resolution", "1920x1080") capabilities.setdefault("browserstack.local", True) capabilities.setdefault("browserstack.debug", True) config = os.path.expanduser("~/.browserstack.json") cfg = json.load(open(config)) hub = "http://{user}:{key}@hub.browserstack.com/wd/hub" hub = hub.format(**cfg) webdriver = selenium.webdriver.Remote(command_executor=hub, desired_capabilities=capabilities) webdriver.set_page_load_timeout(60) webdriver.implicitly_wait(10) return webdriver
def get_browserstack_webdriver(capabilities): capabilities.setdefault('resolution', '1920x1080') capabilities.setdefault('browserstack.local', True) capabilities.setdefault('browserstack.debug', True) config = os.path.expanduser('~/.browserstack.json') cfg = json.load(open(config)) hub = 'http://{user}:{key}@hub.browserstack.com/wd/hub' hub = hub.format(**cfg) webdriver = selenium.webdriver.Remote(command_executor=hub, desired_capabilities=capabilities) webdriver.set_page_load_timeout(60) webdriver.implicitly_wait(10) return webdriver
def get_imgsrc_by_render(url, webdriver): webdriver.set_page_load_timeout(60) try: webdriver.get(url) except TimeoutException: print("timeout") webdriver.execute_script('window.stop()') finally: soup_html = BeautifulSoup(webdriver.page_source, 'lxml') # print(webdriver.page_source) for img_src in soup_html.find_all('img'): if 'name' in img_src.attrs and img_src['name'] == 'TheImg': print(img_src['src']) return img_src['src'] with open(file_path, "w") as f: f.write(webdriver.page_source)
def init_webdriver(webdriver): firefox_profile = webdriver.FirefoxProfile() firefox_profile.set_preference("permissions.default.stylesheet", 2) firefox_profile.set_preference("permissions.default.image", 2) firefox_profile.set_preference("permissions.default.script", 2) firefox_profile.set_preference("permissions.default.subdocument", 2) firefox_profile.set_preference("javascript.enabled", False) firefox_profile.update_preferences() with my_lock: #init this driver #driver = webdriver.Firefox() #browser = webdriver.Remote(browser_profile=firefox_profile, desired_capabilities=webdriver.DesiredCapabilities.FIREFOX, command_executor=remote) webdriver = webdriver.Firefox(firefox_profile=firefox_profile) webdriver.set_page_load_timeout(300) return webdriver
def navegar_a_sitio(webdriver, url_a_navegar, result_list): resultado = Result() resultado.tiempo_inicio_de_ejecucion = 0 resultado.inicializar_tiempo_de_ejecucion() SeleniumTesting.log.info( 'ingresando a la siguiente url: "{}"'.format(url_a_navegar)) try: webdriver.set_page_load_timeout(100) webdriver.get(url_a_navegar) resultado.mensaje_error = 'Accediendo a la pagina principal de la plataforma Exchange'\ ' OWA con exito, url actual: "{}"'.format(url_a_navegar) resultado.validacion_correcta = True SeleniumTesting.log.info(resultado.mensaje_error) except TimeoutException as e: resultado.mensaje_error = 'Han transcurrido mas de 60 segundos sin poder acceder a la '\ 'pagina principal de la plataforma "{}": {}'.format(url_a_navegar, SeleniumTesting.formatear_excepcion(e)) resultado.validacion_correcta = False SeleniumTesting.log.error(resultado.mensaje_error) except WebDriverException as e: resultado.mensaje_error = 'No fue posible ingresar a la plataforma de Exchange OWA, favor de verificar'\ ' si se tiene conectividad por internet, error detallado : {}'.format( SeleniumTesting.formatear_excepcion(e)) resultado.validacion_correcta = False SeleniumTesting.log.error(resultado.mensaje_error) resultado.finalizar_tiempo_de_ejecucion() resultado.establecer_tiempo_de_ejecucion() result_list.result_validacion_ingreso_url = resultado return result_list
def get_driver(): chrome = None # 如果有设置 chrome.exe 的环境变量,这里可以不用主动设置 driver = None # 如果有设置 chromedriver.exe 的环境变量,这里可以不用主动设置 chrome = r"C:/Program Files (x86)/Google/Chrome/Application/chrome.exe" # driver = r'D:/Python/Python36/Scripts/chromedriver.exe' remote_port = 9223 proxy_port = None # 8888 # 使用代理调试则将这里设置成代理端口既可,方便 mitmdump 等工具使用 import os, shutil, subprocess chrome_path = shutil.which( 'chrome') if not chrome else chrome # 在环境变量里面找文件的绝对地址 driver_path = shutil.which( 'chromedriver') if not driver else driver # 在环境变量里面找文件的绝对地址 assert chrome_path, "pls set chrome.exe path in env or set chrome=$abs_path(chrome.exe)." assert driver_path, "pls set chromedriver.exe path in env or set driver=$abs_path(chromedriver.exe)." # 临时 chrome 配置文件存放地址,防止破环日常使用的 chrome 配置 # 另外,经过测试,如果删除掉旧的临时配置文件的地址,启动会块很多很多 home = os.environ.get('HOME') home = home if home else os.environ.get('HOMEDRIVE') + os.environ.get( 'HOMEPATH') home = os.path.join(home, 'auto_selenium', 'AutomationProfile') cache_path = os.path.split(home)[0] if os.path.isdir(cache_path): print('cache_path clear: {}'.format(cache_path)) shutil.rmtree(cache_path) # 如果想要使用代理 if proxy_port: chrome_exe = '''"{}" --remote-debugging-port={} --user-data-dir="{}" --proxy-server=http://127.0.0.1:{}'''.format( chrome_path, remote_port, home, proxy_port) else: chrome_exe = '''"{}" --remote-debugging-port={} --user-data-dir="{}"'''.format( chrome_path, remote_port, home) subprocess.Popen(chrome_exe) print('driver_path: {}'.format(driver_path)) print('chrome_path: {}'.format(chrome_path)) print('chrome_exe: {}'.format(chrome_exe)) import selenium from selenium import webdriver chrome_options = webdriver.ChromeOptions() chrome_options.add_experimental_option("debuggerAddress", "127.0.0.1:{}".format(remote_port)) # 处理 document.$cdc_asdjflasutopfhvcZLmcfl_ 参数的指纹的检测 def check_magic_word(driver_path, rollback=False): with open(driver_path, 'rb') as f: filebit = f.read() a, b = b'$cdc_asdjflasutopfhvcZLmcfl_', b'$pqp_nfqwsynfhgbcsuipMYzpsy_' a, b = (b, a) if rollback else (a, b) mgc_o, mgc_t = a, b if mgc_o in filebit: with open(driver_path, 'wb') as f: f.write(filebit.replace(mgc_o, mgc_t)) check_magic_word(driver_path, rollback=False) # 启动 webdriver webdriver = webdriver.Chrome(chrome_options=chrome_options, executable_path=driver_path) webdriver.set_page_load_timeout(5) # 让所有的 get 网页的加载都限制在 n秒钟内,防止无限加载的问题。 _bak_get = webdriver.get def get(url): try: _bak_get(url) except selenium.common.exceptions.TimeoutException: print( 'selenium.common.exceptions.TimeoutException: {}'.format(url)) get(url) webdriver.get = get # 设置:当你直接关闭浏览器时自动关闭 chromedriver.exe,防止进程残留 import time, threading def hook_close_window(): chrome_close = False while not chrome_close: time.sleep(.3) # 每0.3秒检测一次,如果强制关闭浏览器,则自动关闭 chromedriver.exe try: driver_logs = webdriver.get_log('driver') except: driver_logs = [] for i in driver_logs: if 'Unable to evaluate script: disconnected: not connected to DevTools' in i.get( 'message'): chrome_close = True webdriver.quit() threading.Thread(target=hook_close_window).start() return webdriver
from selenium import webdriver # rule-id: synchronization-with-sleep webdriver.set_page_load_timeout(30) webdriver.set_page_load_timeout(0.3) # rule-id: synchronization-with-sleep webdriver.set_script_timeout(20) webdriver.set_script_timeout(0.2) # rule-id: synchronization-with-sleep await asyncio.sleep(1) import time # rule-id: synchronization-with-sleep time.sleep(10) # rule-id: synchronization-with-sleep time.sleep(0.2) def a(): return 10 # OK t.sleep(a()) # OK t.sleep(some_var)
def carregando_url(webdriver, url): webdriver.set_page_load_timeout(30) webdriver.get(url) url = requests.get(url) print(url.status_code)
def get_driver(): def get_win_chrome_path(): # 注意,要使用非硬盘版安装的 chrome 软件才会在注册表里面留有痕迹,才能使用这个函数快速定位软件地址 # 通常来说 chrome 的安装一般都是非硬盘版的安装,所以这个函数算是在 windows 系统下获取 chrome.exe 路径的通解。 import os, winreg sub_key = ['SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Uninstall', 'SOFTWARE\\Wow6432Node\\Microsoft\\Windows\\CurrentVersion\\Uninstall'] def get_install_list(key, root): try: _key = winreg.OpenKey(root, key, 0, winreg.KEY_ALL_ACCESS) for j in range(0, winreg.QueryInfoKey(_key)[0]-1): try: each_key = winreg.OpenKey(root, key + '\\' + winreg.EnumKey(_key, j), 0, winreg.KEY_ALL_ACCESS) displayname, REG_SZ = winreg.QueryValueEx(each_key, 'DisplayName') install_loc, REG_SZ = winreg.QueryValueEx(each_key, 'InstallLocation') display_var, REG_SZ = winreg.QueryValueEx(each_key, 'DisplayVersion') yield displayname, install_loc, display_var except WindowsError: pass except: pass for key in sub_key: for root in [winreg.HKEY_LOCAL_MACHINE, winreg.HKEY_CURRENT_USER]: for name, local, var in get_install_list(key, root): if name == 'Google Chrome': return os.path.join(local, 'chrome.exe') chrome = get_win_chrome_path() # 尝试自动获取 chrome.exe 的地址 driver = None # 如果有设置 chromedriver.exe 的环境变量,这里可以不用主动设置 # driver = r'D:/Python/Python36/Scripts/chromedriver.exe' remote_port = 9223 proxy_port = None # 8888 # 使用代理调试则将这里设置成代理端口既可,方便 mitmdump 等工具使用 import os, shutil, subprocess chrome_path = shutil.which('chrome') if not chrome else chrome # 在环境变量里面找文件的绝对地址 driver_path = shutil.which('chromedriver') if not driver else driver # 在环境变量里面找文件的绝对地址 assert chrome_path, "pls set chrome.exe path in env or set chrome=$abs_path(chrome.exe)." assert driver_path, "pls set chromedriver.exe path in env or set driver=$abs_path(chromedriver.exe)." # 临时 chrome 配置文件存放地址,防止破环日常使用的 chrome 配置 # 另外,经过测试,如果删除掉旧的临时配置文件的地址,启动会块很多很多 home = os.environ.get('HOME') home = home if home else os.environ.get('HOMEDRIVE') + os.environ.get('HOMEPATH') home = os.path.join(home, 'auto_selenium', 'AutomationProfile') cache_path = os.path.split(home)[0] if os.path.isdir(cache_path): # print('cache_path clear: {}'.format(cache_path)) shutil.rmtree(cache_path) # 如果想要使用代理 if proxy_port: chrome_exe = '''"{}" --remote-debugging-port={} --user-data-dir="{}" --proxy-server=http://127.0.0.1:{}'''.format(chrome_path, remote_port, home, proxy_port) else: chrome_exe = '''"{}" --remote-debugging-port={} --user-data-dir="{}"'''.format(chrome_path, remote_port, home) subprocess.Popen(chrome_exe) # print('driver_path: {}'.format(driver_path)) # print('chrome_path: {}'.format(chrome_path)) # print('chrome_exe: {}'.format(chrome_exe)) import selenium from selenium import webdriver chrome_options = webdriver.ChromeOptions() chrome_options.add_experimental_option("debuggerAddress", "127.0.0.1:{}".format(remote_port)) # 处理 document.$cdc_asdjflasutopfhvcZLmcfl_ 参数的指纹的检测 def check_magic_word(driver_path, rollback=False): with open(driver_path, 'rb') as f: filebit = f.read() a, b = b'$cdc_asdjflasutopfhvcZLmcfl_', b'$pqp_nfqwsynfhgbcsuipMYzpsy_' a, b = (b, a) if rollback else (a, b) mgc_o, mgc_t = a, b if mgc_o in filebit: with open(driver_path, 'wb') as f: f.write(filebit.replace(mgc_o, mgc_t)) check_magic_word(driver_path, rollback=False) # 启动 webdriver webdriver = webdriver.Chrome(options=chrome_options, executable_path=driver_path) webdriver.set_page_load_timeout(5) # 让所有的 get 网页的加载都限制在 n秒钟内,防止无限加载的问题。 _bak_get = webdriver.get def get(url): try: _bak_get(url) except selenium.common.exceptions.TimeoutException: print('selenium.common.exceptions.TimeoutException: {}'.format(url)) get(url) webdriver.get = get # 设置:当你直接关闭浏览器时自动关闭 chromedriver.exe,防止进程残留 import time, threading def hook_close_window(): chrome_close = False while not chrome_close: time.sleep(.3) # 每0.3秒检测一次,如果强制关闭浏览器,则自动关闭 chromedriver.exe try: driver_logs = webdriver.get_log('driver') except: driver_logs = [] for i in driver_logs: if 'Unable to evaluate script: disconnected: not connected to DevTools' in i.get('message'): chrome_close = True webdriver.quit() threading.Thread(target=hook_close_window).start() return webdriver