def getdata(url, base_url): option = ChromeOptions() option.add_experimental_option('excludeSwitches', ['enable-automation']) option.add_argument( 'user-agent="Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Mobile Safari/537.36"' ) option.add_argument('--headless') option.add_argument('--disable-gpu') #无头浏览器 driver = Chrome(options=option) driver.get(url) driver.implicitly_wait(3) driver.find_element_by_xpath('//*[@id="promoBannerIndex"]/a[2]').click() time.sleep(3) driver.find_element_by_xpath( '//*[@id="indexContainer"]/div/div[1]/div[2]/a[3]').click() time.sleep(3) # time.sleep(3000) for i in range(5): driver.execute_script('window.scrollTo(0, document.body.scrollHeight)') # print("下滑",i+1,"页") time.sleep(2) tree = etree.HTML(driver.page_source) #获取相应链接 gid_lists = tree.xpath( '//div[@class="list_content"]/section/@data-item-id') #获取文章内容 # print("新闻数为",len(gid_lists)) driver.quit() return gid_lists
def create_download_dir_capabilities_for_chrome(path_to_download, **extensions_files): """ Example use | ${capabilities} | create_download_dir_capabilities_for_chrome | Artifacts | | Open Browser Extension | https://support.spatialkey.com/spatialkey-sample-csv-data/ | gc | desired_capabilities=${capabilities} | | Click Element | //a[contains(@href,'sample.csv.zip')] | """ path_to_download_check = validate_create_artifacts_dir( path_to_download) chrome_options = ChromeOptions() prefs = { "download.default_directory": path_to_download_check, "directory_upgrade": "true" } chrome_options.add_experimental_option("prefs", prefs) chrome_options.add_argument("--disable-web-security") for single_extension in extensions_files: chrome_options.add_extension(single_extension) logger.info("Chrome Capabilities set download dir '" + path_to_download_check + "'") return chrome_options.to_capabilities()
def login(): if (not accountFileExists()): createFile() account = getAccountValues() usernameStr = account['username'] passwordStr = account['password'] opts = ChromeOptions() opts.add_experimental_option('detach', True) browser = Chrome(executable_path='chromedriver', chrome_options=opts) browser.get('http://192.168.254.254/html/overview.html') loginBtn = browser.find_element_by_id('logout_span') loginBtn.click() username = browser.find_element_by_id('username') username.send_keys(usernameStr) password = browser.find_element_by_id('password') password.send_keys(passwordStr) popLogin = browser.find_element_by_id('pop_login') popLogin.click() #sends a message send_message(browser)
def main(job_name): with open('boss_cookies.json', 'r') as file: data = file.read() cookies = json.loads(data) # 避免被识别出为模拟浏览器 options = ChromeOptions() options.add_experimental_option('excludeSwitches', ['enable-automation']) options.add_argument('--headless') browser = webdriver.Chrome(options=options) browser.get('https://www.zhipin.com/') for i in cookies: browser.add_cookie(i) browser.refresh() while True: try: inquire_job(browser, job_name) time.sleep(1) get_job_items(browser) # save_to_json(results) next_page_url = get_next_page(browser) time.sleep(random.uniform(1, 10)) browser.get(next_page_url) except Exception: break browser.close()
def __init__(self): log.start(logfile=time.strftime("log/%Y%m%d%H%M%S")+".log",logstdout=False) log.msg("initiating crawler...",level=log.INFO) self.crawler_id = self.get_crawler_id() log.msg("crawler id is %s" % self.crawler_id,level=log.INFO) self.r.set('crawler:ip:%s' % self.crawler_id,utils.get_external_ip()) self.r.set('crawler:port:%s' % self.crawler_id,settings.REDIS_LOCAL_PORT) self.r.set('crawler:mapping_port:%s' % self.crawler_id,settings.REDIS_LOCAL_MAPPING_PORT) log.msg("crawler ip is %s, port is %d" % (utils.get_external_ip(),settings.REDIS_LOCAL_PORT),level=log.INFO) account = self.get_account() self.username = account[0] self.password = account[1] log.msg("crawler account got",level=log.INFO) self.r_local.set('crawler:status:%s' % self.crawler_id, 'good') self.r_local.set('crawler:update_time:%s' % self.crawler_id, datetime.datetime.utcnow().strftime("%s")) log.msg("local crawler status set",level=log.INFO) heartbeat_thread = threading.Thread(target=self.maintain_local_heartbeat) heartbeat_thread.start() log.msg("local crawler heartbeat started",level=log.INFO) if platform.system() == "Linux": #on linux, use virtual display vdisplay = Xvfb() vdisplay.start() co = ChromeOptions() #TODO: Disable image after log in #TODO: optimize memory usage co.add_experimental_option("prefs",{"profile.default_content_settings":{"popups":1}}) #co.add_experimental_option("prefs",{"profile.default_content_settings":{"popups":1,"images":2,"media":2}}) self.driver = webdriver.Chrome(chrome_options=co) self.driver.set_window_size(640,960)
def _config_browser(self): opts = ChromeOptions() opts.add_experimental_option("detach", True) driver_dir = self._get_driver_dir() self._logger.info(f"using {driver_dir}") return Chrome(driver_dir, chrome_options=opts)
def __init__(self): # 检查img目录是否为空 if os.listdir(self.IMG_PATH): logger.error(f"{self.IMG_PATH}目录不为空") sys.exit() options = ChromeOptions() options.add_experimental_option('excludeSwitches', ['enable-automation']) # 隐藏自动化测试 # options.add_argument("--headless") # 注释掉,就无需打开浏览器窗口 # 加载网站 self.bro = webdriver.Chrome( executable_path="./chromedriver_win32/chromedriver.exe", options=options) self.bro.get(url="https://www.cstimer.net/") # 检查网页是否加载完成 WebDriverWait(self.bro, 60).until( EC.presence_of_element_located(( By.XPATH, '//div[@id="leftbar"]/div[@class="mybutton c6"]/div/span[2]'))) # 判断图片框是否显示 if not self.bro.find_element_by_id('toolsDiv').is_displayed(): # 点击显示图片 self.bro.find_element_by_xpath( '//div[@id="leftbar"]/div[@class="mybutton c6"]/div/span[2]' ).click()
def scrape(request): # driver = Chrome("D:/chromedriver_win32/chromedriver.exe") link = "https://www.pajhwok.com/en/health" chrome_options = ChromeOptions() prefs = {"profile.managed_default_content_settings.images": 2} chrome_options.add_experimental_option("prefs", prefs) driver = Chrome("D:/chromedriver_win32/chromedriver.exe", chrome_options=chrome_options) driver.maximize_window() driver.get(link) headlines = [ headline.text for headline in driver.find_elements_by_xpath( "//h2[@class='node-title']/a") ] for headline in headlines: new_headline = [headline] loaded_vec = CountVectorizer( vocabulary=pickle.load(open("features.pkl", "rb"))) loaded_tfidf = pickle.load(open("tfidf.pkl", "rb")) loaded_model = pickle.load(open("model.pkl", "rb")) X_new_counts = loaded_vec.transform(new_headline) X_new_tfidf = loaded_tfidf.transform(X_new_counts) predicted = loaded_model.predict(X_new_tfidf) news = News() news.headline = headline news.predicted_category = predicted news.save() return render( request, 'index.html', context={'message': 'The Site Has Been Successfully Scraped'})
def detailpojie(detailUrl): options = ChromeOptions() options.add_experimental_option('excludeSwitches', ['enable-automation']) desired_capabilities = options.to_capabilities() desired_capabilities['acceptSslCerts'] = True desired_capabilities['acceptInsecureCerts'] = True desired_capabilities['proxy'] = { "httpProxy": PROXY, "ftpProxy": PROXY, "sslProxy": PROXY, "noProxy": None, "proxyType": "MANUAL", "class": "org.openqa.selenium.Proxy", "autodetect": False, } driver = webdriver.Chrome(executable_path="/Users/echo/Documents/sites/wxhub-master/chromedriver", options=options, desired_capabilities=desired_capabilities) driver.get(detailUrl) while True: try: driver.find_element_by_xpath('//*[@id="body"]') try: driver.find_element_by_xpath('//*[@id="root"]/div') time.sleep(30) continue except Exception: time.sleep(3) driver.quit() return except: time.sleep(30) continue '''time.sleep(10)
def _init_selenium_driver_chrome(self, uagent, proxy_server): """ Initializes and returns the selenium webdriver, using the Chrome binary. """ options = ChromeOptions() prefs = {"profile.managed_default_content_settings.images": 2, "profile.default_content_setting_values.notifications": 2} options.add_experimental_option("prefs", prefs) # options.add_argument("--headless") options.add_argument("start-maximized") if proxy_server: proxy_address = proxy_server[0] proxy_port = str(proxy_server[1]) proxy_protocol = proxy_server[2] print("--proxy-server=" + proxy_protocol + "://" + proxy_address + ":" + proxy_port) options.add_argument("--proxy-server=" + proxy_protocol + "://" + proxy_address + ":" + proxy_port) if uagent: options.add_argument("--user-agent=" + uagent) print("--user-agent=" + uagent) self.driver = webdriver.Chrome("chromedriver.exe", chrome_options=options) self.driver.set_page_load_timeout(120) return self.driver
def Method2(self): driverlocation = "C:\\Users\\jzo_0\\PycharmProjects\\chromedriver.exe" os.environ["webdriver.chrome.driver"] = driverlocation opts = ChromeOptions() opts.add_experimental_option("detach", True) browser = webdriver.Chrome(driverlocation, chrome_options=opts) browser.get('http://bing.com') js1 = 'window.open("https://www.edureka.co/blog/selenium-framework-data-keyword-hybrid-frameworks","_blank");' js2 = 'window.open("https://www.softwaretestingmaterial.com/data-driven-framework-selenium-webdriver/","_blank");' js3 = 'window.open("https://www.softwaretestinghelp.com/data-driven-framework-selenium-apache-poi/","_blank");' js4 = 'window.open("https://www.toolsqa.com/selenium-webdriver/data-driven-testing-excel-poi/","_blank");' js5 = 'window.open("https://www.toolsqa.com/selenium-webdriver/data-driven-framework/","_blank");' js6 = 'window.open("https://www.toolsqa.com/selenium-webdriver/data-driven-framework/","_blank");' browser.execute_script(js1) browser.execute_script(js2) browser.execute_script(js3) browser.execute_script(js4) browser.execute_script(js5) browser.execute_script(js6) handles = browser.window_handles size = len(handles) for x in range(size): browser.switch_to.window(handles[2]) print(browser.title)
def get_options(*, headless=False, user_data_dir=None, downloads_dir=None) -> ChromeOptions: """Creates ChromeOptions with some additional values""" opts = ChromeOptions() opts.add_argument("--disable-notifications") opts.add_argument("--disable-audio-output") opts.add_argument("--start-maximized") opts.add_argument("--window-size=1920,1080") if headless: opts.add_argument("--headless") if user_data_dir: user_data_dir_path = Path(user_data_dir).absolute() if user_data_dir_path.exists(): # pragma: no cover _log.debug("Using existing user-data-dir at: %s", str(user_data_dir_path)) else: _log.debug("Creating user-data-dir at: %s", str(user_data_dir_path)) user_data_dir_path.mkdir(parents=True, exist_ok=True) opts.add_argument(f"user-data-dir={str(user_data_dir_path)}") if downloads_dir: downloads_dir_path = Path(downloads_dir) if downloads_dir_path.exists(): # pragma: no cover _log.debug("Using existing downloads-dir at: %s", str(downloads_dir_path)) else: _log.debug("Creating downloads-dir at: %s", str(downloads_dir_path)) downloads_dir_path.mkdir(parents=True, exist_ok=True) prefs = {"profile.default_content_settings.popups": 0, "download.default_directory": str(downloads_dir_path)} opts.add_experimental_option("prefs", prefs) return opts
def test_baidu(browser, search): print('start %s' % ctime()) print('browser %s' % browser) err_msg = "browser參數有誤: 只能為edge, chrome, ff" opts = ChromeOptions() opts.add_experimental_option("detach", True) if browser == "edge": driver = webdriver.Edge() elif browser == "chrome": # https://stackoverflow.com/questions/43612340/chromedriver-closing-after-test # If you want chrome and chromedriver to stay open afterward, you can add the detach option when starting chromedriver driver = webdriver.Chrome(chrome_options=opts) elif browser == "ff": driver = webdriver.Firefox() else: print(err_msg) # can not use the dictionary get(key[, default]) method here # driver = None # dict_driver = {"edge": "webdriver.Edge()", # "chrome": "webdriver.Chrome(chrome_options=opts)", # "ff": "webdriver.Firefox()"} # driver_str = dict_driver.get(browser, None) # assert driver_str, err_msg # exec(driver_str, driver) # AttributeError: 'NoneType' object has no attribute 'get' driver.get("http://www.baidu.com") driver.find_element_by_id("kw").send_keys(search) driver.find_element_by_id("su").click()
def BrowserSetUp(context): """ Esse método cria o browser através do parâmetro informado pela linha de comando parâmetro: --browser chrome (ou firefox) """ print("Running browser setUp") if context.config.userdata['browser'] == 'firefox': print("Tests will be executed on Firefox") context.driver = webdriver.Firefox() elif context.config.userdata['browser'] == 'chrome': options = ChromeOptions() options.add_experimental_option( "prefs", { "download.default_directory": "C:\\Users\\malencar\\Documents\\Projetos\\everis-python-bdd-automation\\temp", "download.prompt_for_download": False, "download.directory_upgrade": True, "safebrowsing.enabled": False }) print("Tests will be executed on Chrome") context.driver = webdriver.Chrome(os.path.join("framework", "chromedriver.exe"), chrome_options=options) context.driver.maximize_window() context.driver.implicitly_wait(10) yield context.driver
def passwordValid(self): driverlocation = "C:\\Users\\jzo_0\\PycharmProjects\\chromedriver.exe" os.environ["webdriver.chrome.driver"] = driverlocation opts = ChromeOptions() opts.add_experimental_option("detach", True) driver = webdriver.Chrome(driverlocation, chrome_options=opts) url1 = "https://www.guru99.com/creating-keyword-hybrid-frameworks-with-selenium.html" url2 = "" driver.get(url1) username_1 = '//*[@id="usrname"]' password_1 = '//*[@id="psw"]' submit_1 = '/html/body/div/form/input[3]' time.sleep(1) elm_username = driver.find_element_by_xpath(username_1) time.sleep(1) elm_password = driver.find_element_by_xpath(password_1) elm_submit = driver.find_element_by_xpath(submit_1) elm_username.click() elm_username.send_keys('admin') elm_password.click() elm_password.send_keys('asdfg456') elm_submit.click() try: WebDriverWait(driver, 3).until( EC.alert_is_present(), 'Timed out waiting for PA creation ' + 'confirmation popup to appear.') alert = driver.switch_to.alert alert.accept() print("alert accepted") except TimeoutException: print("no alert")
def driver_factory(browser, executor): if browser == "chrome": logger = logging.getLogger('chrome_fixture') logger.setLevel(LOG_LEVEL) options = ChromeOptions() options.headless = True options.add_argument('--ignore-ssl-errors=yes') options.add_argument('--ignore-certificate-errors') logger.info("Подготовка среды для запуска тестов...") options.add_experimental_option('w3c', False) driver = EventFiringWebDriver( webdriver.Remote(command_executor=f"http://{executor}:4444/wd/hub", desired_capabilities={ "browserName": browser, "platform": "WIN10", "platformName": "WIN10" }, options=options), MyListener()) logger.debug( "Браузер Chrome запущен со следующими desired_capabilities:{}". format(driver.desired_capabilities)) elif browser == "firefox": profile = FirefoxProfile() profile.accept_untrusted_certs = True options = FirefoxOptions() options.headless = True driver = webdriver.Firefox(options=options, firefox_profile=profile) else: raise Exception("Driver not supported") return driver
def deneed_weixin(config): url = "https://www.wjx.cn/m/" + config['wjx_id'][-13:] target_time = config['target_time'] useragent = config['user-agent'] option = ChromeOptions() option.add_argument(f'--user-agent={useragent}') option.add_experimental_option('excludeSwitches', ['enable-automation']) # 下面两行取消注释即可 浏览器不跳出来 # chrome_options.add_argument('--headless') # chrome_options.add_argument('--disable-gpu') browser = webdriver.Chrome(executable_path='chromedriver.exe', options=option) target_time = mktime(time.strptime(target_time, "%Y-%m-%d %H:%M:%S")) for i, info in enumerate(config['users']): while (True): if (time.time() > target_time): browser.get(url) break else: print(f'时间: {ctime()}====等待中') time.sleep(1) print(f'时间: {ctime()}===={info["name"]}开始提交') questions = browser.find_elements_by_css_selector('.field') print(questions) while (len(questions) == 0): time.sleep(0.5) browser.refresh() questions = browser.find_elements_by_css_selector('.field') print(questions) submit(browser, questions, info).post(info) print(f'时间: {ctime()}====任务完成')
def __init__(self): self._download_dir = join(dirname(realpath(__file__)), 'download') self._wait_time = self.DEFAULT_WAIT_TIME print(self._wait_time) options = ChromeOptions() options.add_experimental_option( 'prefs', {'download.default_directory': self._download_dir}) executable_name = None if (os.name == 'posix'): executable_name = 'chromedriver_linux64' elif (os.name == 'nt'): executable_name = 'chromedriver_windows32.exe' elif (os.name == 'mac'): executable_name = 'chromedriver_mac' else: raise Exception("Unsupported OS '%s'" % os.name) super().__init__(join('webdriver', executable_name), chrome_options=options) self.implicitly_wait(self._wait_time) self.maximize_window()
def tf_Type_driver_scroller(Chromedriver): opts = ChromeOptions() opts.add_experimental_option("detach", True) Chromedriver = webdriver.Chrome(Chrome_path, options=opts) total_height = Chromedriver.execute_script( 'return document.body.parentNode.scrollHeight') viewport_height = Chromedriver.execute_script( 'return window.innerHeight') total_width = Chromedriver.execute_script( 'return document.body.offsetWidth') viewport_width = Chromedriver.execute_script( "return document.body.clientWidth") # this implementation assume (viewport_width == total_width) assert (viewport_width == total_width) # scroll the page, take screenshots and save screenshots to slices offset = 0 # height while offset < total_height: if offset + viewport_height > total_height: offset = total_height - viewport_height Chromedriver.execute_script('window.scrollTo({0}, {1})'.format( 0, offset)) time.sleep(Wait_1) offset = offset + viewport_height if total_height < 10000: update_total_height = Chromedriver.execute_script( 'return document.body.parentNode.scrollHeight') if total_height != update_total_height: total_height = update_total_height Chromedriver.execute_script('window.scrollTo({0}, {1})'.format(0, 0))
def tf_source_code(driver): path = FacebookLogin.tf_check_folder_path("sourcepage") opts = ChromeOptions() opts.add_experimental_option("detach", True) driver = webdriver.Chrome(Chrome_path, options=opts) title = driver.title if title != "": title_length = len(str(title)) if title_length > 26: title = title.replace("@", "").replace("/", "").replace( "$", "").replace(".", "").replace(":", "").replace("|", "") title = str(title)[0:25] else: title = title.replace("@", "").replace("/", "").replace( "$", "").replace(".", "").replace(":", "").replace("|", "") else: title = driver.current_url title = title.replace("@", "").replace("/", "").replace( "$", "").replace(".", "").replace(":", "").replace("|", "") tf_File_name = path + "/" + title + ".html" if os.path.exists(FacebookLogin.tf_File_name): FacebookLogin.tf_check_and_rename(FacebookLogin.tf_File_name) pagesource = driver.page_source.encode('ascii', 'ignore') soup = BeautifulSoup(pagesource, 'html.parser') # Create text file, then write page source to the file fh = open(FacebookLogin.tf_File_name, 'w') fh.write(str(soup.prettify())) fh.close()
def start_browser(link,cookies): caps = DesiredCapabilities().CHROME caps["pageLoadStrategy"] = "eager" chrome_options = ChromeOptions() chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"]) chrome_options.add_experimental_option("useAutomationExtension", False) driver = Chrome(desired_capabilities=caps, executable_path=driver_path, options=chrome_options) driver.execute_cdp_cmd( "Page.addScriptToEvaluateOnNewDocument", { "source": """ Object.defineProperty(window, 'navigator', { value: new Proxy(navigator, { has: (target, key) => (key === 'webdriver' ? false : key in target), get: (target, key) => key === 'webdriver' ? undefined : typeof target[key] === 'function' ? target[key].bind(target) : target[key] }) }) """ }, ) driver.get(link) for cookie in cookies: driver.add_cookie({ "name": cookie["name"], "value" : cookie["value"], "domain" : cookie["domain"] }) driver.get(link)
def driver_factory(browser): if browser == "chrome": logger = logging.getLogger('chrome_fixture') logger.setLevel(LOG_LEVEL) options = ChromeOptions() options.headless = True options.add_argument('--ignore-ssl-errors=yes') options.add_argument('--ignore-certificate-errors') logger.info("Подготовка среды для запуска тестов...") caps = DesiredCapabilities.CHROME caps['loggingPrefs'] = {'performance': 'ALL', 'browser': 'ALL'} options.add_experimental_option('w3c', False) driver = EventFiringWebDriver( webdriver.Chrome(desired_capabilities=caps, options=options), MyListener()) logger.debug( "Браузер Chrome запущен со следующими desired_capabilities:{}". format(driver.desired_capabilities)) elif browser == "firefox": profile = FirefoxProfile() profile.accept_untrusted_certs = True options = FirefoxOptions() options.headless = True driver = webdriver.Firefox(options=options, firefox_profile=profile) else: raise Exception("Driver not supported") return driver
def _set_chrome_options(self, options: List) -> ChromeOptions: """Static method to create a ChromeOptions class with options. Args: options: list of options as defined in __init___ method Returns: instance of ChromeOptions with options defined in options parameter """ log_client = LogsClient(output_file=self.log_output_file, project_dir=project_dir, file_name=file_name, log_run_uuid=self.log_run_uuid) log_client.set_msg(log_type="info", log_msg="setting chrome options") try: chrome_options = ChromeOptions() for option in options: if isinstance(option, dict): chrome_options.add_experimental_option("prefs", option) else: chrome_options.add_argument(option) return chrome_options except Exception as e: log_client.set_msg(log_type="error", log_msg=f"the following error occurred with args: {e.args}") assert False, "breaking code execution, see log file to track error"
def setUp(self): locationdriver = "/Users/tuanbuic/PycharmProjects/AutomationPractice/resources/chromedriver.exe" opts = ChromeOptions() opts.add_experimental_option("detach", True) self.driver = webdriver.Chrome(locationdriver, chrome_options=opts) self.driver.implicitly_wait(10) self.driver.maximize_window()
def attach_chrome_browser(self, port: int, alias: Optional[str] = None): """Attach to an existing instance of Chrome or Chromium. Requires that the browser was started with the command line option ``--remote-debugging-port=<port>``, where port is any 4-digit number not being used by other applications. That port can then be used to connect using this keyword. Example: | Attach Chrome Browser | port=9222 | """ options = ChromeOptions() options.add_experimental_option("debuggerAddress", f"localhost:{port:d}") create = partial(self._create_webdriver, "Chrome", alias, chrome_options=options) try: return create(download=False) except Exception: # pylint: disable=broad-except self.logger.debug(traceback.format_exc()) return create(download=True)
def driver_init(): print('开始模拟首次登陆') global driver # 2.模拟登陆 options = ChromeOptions() # 2.1.添加代理(根据自己电脑的代理设置) options.add_argument('--proxy-server=socks5://localhost:1087') # 2.2.消除chrome跳窗 prefs = { 'profile.default_content_setting_values' : { 'notifications' : 2 } } options.add_experimental_option('prefs',prefs) # 2.3.启动chromedriver浏览器 driver = start_chrome("www.facebook.com", headless=False, options=options) # 2.4.登录facebook # username email_element = WebDriverWait(driver, timeout=10).until(EC.presence_of_element_located((By.ID, "email"))) email_element.clear() email_element.send_keys(YOUR_EMAIL) driver.implicitly_wait(1) # password password_element = WebDriverWait(driver, timeout=10).until(EC.presence_of_element_located((By.ID, "pass"))) password_element.clear() password_element.send_keys(YOUR_PASSWORD) driver.implicitly_wait(1) # click login_element = WebDriverWait(driver, timeout=10).until(EC.presence_of_element_located((By.ID, "loginbutton"))) login_element.click() time.sleep(1) print('完成首次登陆!')
def init_selenium(self, load_cookies=False) -> RemoteWebDriver: options = ChromeOptions() # 自动打开 F12 控制台,方便抓包检查网络请求问题 options.add_argument('--auto-open-devtools-for-tabs') # 同避免 webdriver=True, 在 chrome 低于 79 版本生效 options.add_experimental_option('excludeSwitches', ['enable-automation']) # stackoverflow 上表示避免 TimeoutException 的方法,但似乎没用 options.add_argument("enable-features=NetworkServiceInProcess") browser = webdriver.Chrome(options=options) # 禁止 window.navigator.webdriver = True 被检测到是 webdriver 的爬虫行为 script = ''' Object.defineProperty(navigator, 'webdriver', { get: () => undefined }) ''' browser.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {"source": script}) # 设置 cookie 前需要先跳转到对应的 domain browser.get(self.homepage) browser.set_page_load_timeout(30) if load_cookies: logger.debug('load cookies') with open(config[self.config_key]['cookie_file']) as f: cookies = json.load(f) for cookie in cookies: try: browser.add_cookie(cookie) except Exception as e: print(cookie) raise e return browser
def __init__(self): self.path = '/root/.wdm/drivers/chromedriver/80.0.3987.106/linux64/chromedriver' option = ChromeOptions() option.add_argument('--headless') prefs = { 'profile.default_content_setting_values': { 'notifications': 2 } } option.add_experimental_option('prefs', prefs) option.add_argument('--no-sandbox') option.add_argument('--disable-dev-shm-usage') option.add_argument('--disable-extensions') option.add_argument('--disable-gpu') option.add_argument("--disable-features=VizDisplayCompositor") option.add_experimental_option('excludeSwitches', ['enable-automation']) self.wd = Chrome(options=option, executable_path=self.path) # 移除webdriver self.wd.execute_cdp_cmd( "Page.addScriptToEvaluateOnNewDocument", { "source": """ Object.defineProperty(navigator, 'webdriver', { get: () => undefined }) """ }) self.wd.set_page_load_timeout(20) self.timeout = WebDriverWait(self.wd, 20) self.url = 'https://graph.qq.com/oauth2.0/show?which=Login&display=pc&response_type=code&client_id=101477621&redirect_uri=https%3A%2F%2Fsso.e.qq.com%2Fpassport%3Fsso_redirect_uri%3Dhttps%253A%252F%252Fe.qq.com%252Fads%252F%26service_tag%3D1&scope=get_user_info' self.users = 'xxx' self.passwd = 'xxx'
def Automation(self, url): option = ChromeOptions() option.add_experimental_option('excludeSwitches', ['enable-automation']) self.driver = webdriver.Chrome(options=option) url = str(url) self.driver.get(url)
def captcha_process(url): option = ChromeOptions() option.add_experimental_option('excludeSwitches', ['enable-automation']) driver = webdriver.Chrome(options=option) driver.get(url) soup = BeautifulSoup(driver.page_source, 'lxml') #直到不是验证码界面结束 while soup.find('head').text[:9].find('安全验证 - 知乎') != -1: try: soup = BeautifulSoup(driver.page_source, 'lxml') img = soup.find('img', {'class': 'Unhuman-captcha'})['src'] #此处命名用的一长串随机数 防止多线程处理时命名冲突 name = './temp/' + str(random.random()) + '.png' imgdata = base64.b64decode(img[22:]) f = open(name, 'wb') f.write(imgdata) f.close() pred.set_dirname(name) #预测的验证码 captcha = pred.pred_whole_captcha() driver.find_element_by_xpath( '//*[@id="root"]/div/div[2]/section/div/div/input').click() driver.find_element_by_xpath( '//*[@id="root"]/div/div[2]/section/div/div/input').clear() driver.find_element_by_xpath( '//*[@id="root"]/div/div[2]/section/div/div/input').send_keys( captcha) driver.find_element_by_xpath( '//*[@id="root"]/div/div[2]/section/button').click() except: continue soup = BeautifulSoup(driver.page_source, 'lxml') driver.close()
def driver(config): browser = config['browser'] version = config['version'] url = config['url'] download_dir = config['download_dir'] if browser == 'chrome': options = ChromeOptions() options.add_argument("--window-size=800,600") prefs = {"download.default_directory": download_dir} options.add_experimental_option('prefs', prefs) manager = ChromeDriverManager(version=version) driver = webdriver.Chrome( executable_path=manager.install(), options=options, desired_capabilities={'acceptInsecureCerts': True}) elif browser == 'firefox': manager = GeckoDriverManager(version=version) driver = webdriver.Firefox(executable_path=manager.install()) else: raise UnsupportedBrowserException(f'Unsupported browser: "{browser}"') driver.get(url) driver.maximize_window() yield driver # quit = закрыть страницу, остановить browser driver # close = закрыть страницу, бинарь browser driver останется запущенным driver.quit()
def get_chrome(additional_options: ChromeOptions=None) -> webdriver.Chrome: options = ChromeOptions() if additional_options == None: additional_options = options download_option = {'download.default_directory': get_download_file_path(), 'download.directory_upgrade': 'true', 'download.extensions_to_open': '', } options.add_experimental_option('prefs', download_option) return webdriver.Chrome(get_chrome_exe_path(), desired_capabilities=options.to_capabilities(), chrome_options=additional_options)
def setup_for_test(self, test): chrome_options = ChromeOptions() chrome_options.add_argument("test-type") chrome_options.add_argument("disable-infobars") chrome_options.add_experimental_option('prefs', { 'credentials_enable_service': False, 'profile.password_manager_enabled': False, 'profile.default_content_setting_values.plugins': 1, 'profile.content_settings.plugin_whitelist.adobe-flash-player': 1, 'profile.content_settings.exceptions.plugins.*,*.per_resource.adobe-flash-player': 1 }) if test.use_proxy: chrome_options.add_argument("--proxy-server={0}".format(test.proxy_address)) self.capabilities = chrome_options.to_capabilities() logger.debug("Chrome capabilities: {}".format(self.capabilities))
def setUp(self, browser): self.browser = browser if "firefox" in self.browser: profile = FirefoxProfile() # profile.set_preference("plugin.state.silverlight", 2) # profile.set_preference("browser.download.folderList", 1) # profile.set_preference("pdfjs.disabled", False); # profile.set_preference("pdfjs.firstRun", True); self.driver = Firefox(profile) # get a new firefox session if "chrome" in self.browser: chromedriver = "/usr/local/bin/chromedriver" options = ChromeOptions() options.add_experimental_option('excludeSwitches', ['disable-component-update']) options.add_argument("--user-data-dir=./browser_resources/chrome_data_dir/") os.environ["webdriver.chrome.driver"] = chromedriver self.driver = Chrome(executable_path=chromedriver, chrome_options=options) self.home_page = home.Home(self.driver)
def create_download_dir_capabilities_for_chrome(path_to_download, **extensions_files): """ Example use | ${capabilities} | create_download_dir_capabilities_for_chrome | Artifacts | | Open Browser Extension | https://support.spatialkey.com/spatialkey-sample-csv-data/ | gc | desired_capabilities=${capabilities} | | Click Element | //a[contains(@href,'sample.csv.zip')] | """ path_to_download_check = validate_create_artifacts_dir(path_to_download) chrome_options = ChromeOptions() prefs = {"download.default_directory": path_to_download_check, "directory_upgrade": "true"} chrome_options.add_experimental_option("prefs", prefs) chrome_options.add_argument("--disable-web-security") for single_extension in extensions_files: chrome_options.add_extension(single_extension) logger.info("Chrome Capabilities set download dir '" + path_to_download_check + "'") return chrome_options.to_capabilities()