def __init__(self): """ Initialize Firefox instance """ opts = FirefoxOptions() opts.add_argument("--headless") self.driver = webdriver.Firefox(firefox_options=opts) self.initiate()
def _open_browser_with_headless(self): """ 无头模式打开谷歌或火狐 :return: driver对象 """ driver = None browser = self._reader.get_str('browser', 'browser').lower() try: if browser == 'chrome': chrome_options = ChromeOptions() chrome_options.add_argument('--headless') chrome_options.add_argument('--disable-gpu') driver = webdriver.Chrome(options=chrome_options, executable_path=constant.chrome_path) elif browser == 'firefox': firefox_options = FirefoxOptions() firefox_options.add_argument('--headless') firefox_options.add_argument('--disable-gpu') driver = webdriver.Firefox( options=firefox_options, executable_path=constant.firefox_path, service_log_path=devnull) else: self._log.error(f'{browser}配置有误,或{browser}不支持无头模式,请确认!!') exit() try: version = driver.capabilities['browserVersion'] except KeyError: version = driver.capabilities['version'] self._log.info(f'{browser}启动成功,版本号:{version}') sleep(1) return driver except WebDriverException as e: self._log.error('{}无头模式启动失败:{}'.format(browser, e)) exit()
def init_driver(binary_path, binary_type, stop_compression, proxy_add, proxy_port): """ Method to initialize a Selenium driver. Only support Firefox browser for now. Args: binary_path(str): the path to the 'firefox' executable binary_type(str): for now, binary type can only be 'FirefoxBinary'. Returns: driver(WebDriver): an initialized Selenium WebDriver. """ driver = None if binary_type == 'FirefoxBinary': binary = FirefoxBinary(binary_path) options = FirefoxOptions() options.add_argument('--headless') if stop_compression: options.set_preference('network.http.accept-encoding', '') options.set_preference('network.http.accept-encoding.secure', '') options.set_preference('devtools.cache.disabled', True) if proxy_add and proxy_port: options.set_preference('network.proxy.ftp', proxy_add) options.set_preference('network.proxy.ftp_port', proxy_port) options.set_preference('network.proxy.http', proxy_add) options.set_preference('network.proxy.http_port', proxy_port) options.set_preference('network.proxy.socks', proxy_add) options.set_preference('network.proxy.socks_port', proxy_port) options.set_preference('network.proxy.ssl', proxy_add) options.set_preference('network.proxy.ssl_port', proxy_port) options.set_preference('network.proxy.type', 1) driver = webdriver.Firefox(firefox_binary=binary, options=options) return driver
def get_browser_capabilities(self, browser_name, headless=False): """ Get browser capabilities for specific browser with included options inside :param browser_name: browser name in lowercase :type browser_name: str :param headless: run browser without gui :type headless: bool :return: capabilities for specific browser :rtype: dict """ if self.is_appium_based: return options = None if 'firefox' == browser_name: options = FirefoxOptions() elif 'chrome' == browser_name: options = ChromeOptions() options.add_argument('disable-infobars') if options and headless: options.headless = True # huck for preventing overwriting 'platform' value in desired_capabilities by chrome options browser_caps = options.to_capabilities() if options else {} browser_name, browser_version = [b for b in self.browsers if browser_name.lower() == b[0].lower()][0] browser_caps.update({'browserName': browser_name, 'version': browser_version, 'platform': self.full_name}) if isinstance(self.extra, dict): browser_caps.update(self.extra) return browser_caps
def __init__(self, headless=True, options=[], path='myengine\geckodriver'): browser_options = FirefoxOptions() for _ in options: browser_options.add_argument(_) browser_options.headless = headless Firefox.__init__(self, options=browser_options, executable_path=path) Browser.__init__(self)
def launch_browser(self, browser_name, url): global driver try: if browser_name == "chrome": chromeoptions = ChromeOptions() chromeoptions.add_argument("start-maximized") chromeoptions.add_argument("disable-notifications") chromeoptions.add_argument("--ignore-certificate-errors") chromeoptions.add_argument("--disable-infobars") chromeoptions.add_argument("--disable-extensions") driver = webdriver.Chrome( executable_path="./drivers/chromedriver.exe", options=chromeoptions) log.info("chrome browser launch successfully") elif browser_name == "firefox": firefoxoptions = FirefoxOptions() firefoxoptions.add_argument("start-maximize") driver = webdriver.Firefox( executable_path="./drivers/geckodriver.exe", options=firefoxoptions) log.info("firefox browser launch successfully") elif browser_name == "ie": ieoptions = IeOptions() ieoptions.add_argument("start-maximize") driver = webdriver.Ie( executable_path="./drivers/IEDriverServer.exe", options=ieoptions) log.info("ie browser launch successfully") else: log.error("invalid browser name") except WebDriverException as e: log.error("exception ", e) driver.implicitly_wait(10) driver.get(url)
def bake_chapters(start, stop): """ Use Selenium to get the live javascript rendered webpage and then save it requires a geckodriver to be somewhere in the PATH :param start: start with this chapter :param stop: stop at this chapter (inclusive) """ logger.info('{}: bake_chapters task started'.format( current_task.request.id)) logger.debug('{}: Baking chapters: {} to {}'.format( current_task.request.id, start, stop)) opts = FirefoxOptions() opts.add_argument("--headless") driver = webdriver.Firefox(firefox_options=opts) for i in range(start, stop + 1): logger.debug('{}: Bake chapter: {}'.format(current_task.request.id, i)) url = settings.BAKING_WEBPAGES_BASEURL + 'chapter/?chapter={}'.format( i) driver.get(url) container = driver.find_element_by_class_name( 'container').get_attribute('innerHTML') with open(os.path.join(settings.ESTORIA_LOCATION, 'edition/critical', str(i) + '.html'), 'w', encoding='utf-8') as f: f.write(container) logger.info('{}: complete'.format(current_task.request.id))
def launch_application(browser_name, app_url): global driver log.info("in init method of selenium base") try: if browser_name == "chrome": option = ChromeOptions() option.add_argument("start-maximized") option.add_argument("--ignore-certificate-errors") option.add_argument("--disable-extensions") option.add_argument("--disable-infobars") option.add_argument("disable-notifications") driver = Chrome(executable_path="./drivers/chromedriver.exe", options=option) log.info("chrome browser is launch successfully") elif browser_name == "firefox": profile = FirefoxProfile() profile.accept_untrusted_certs = True options = FirefoxOptions() options.add_argument("start-maximized") driver = Firefox(executable_path="./drivers/geckodriver.exe") log.info("firefox browser is launch successfully") elif browser_name == "ie": driver = Ie(executable_path="./drivers/IEDriverServer.exe") else: log.error("browser name is incorrect", browser_name) except WebDriverException: log.critical("exception", WebDriverException) driver.implicitly_wait(5) driver.get(app_url)
def __init__(self, position, lit, time): # 首页搜索页 self.start_url = 'https://search.51job.com/list/000000,000000,0000,00,9,99,+,2,1.html' # 职位详情页url # 搜索关键字[职位,学历要求,工作经验] self.key_words = [position, lit, time] # 会计, # 大专,本科,硕士 # 应届生,3-5年 self.df = pd.DataFrame(columns=['职位', '日期', '地点', '网址']) with open( '职位详情{0}_{1}_{2}.csv'.format(self.key_words[0], self.key_words[1], self.key_words[2]), 'w') as csvfile: writer = csv.writer(csvfile) writer.writerow(['公司简介', '职位名称', '职位信息']) # 用webdriver options = FirefoxOptions() options.add_argument('-headless') self.browser = Firefox(options=options) self.wait = WebDriverWait(self.browser, 10) with open( '职位详情{0}_{1}_{2}.csv'.format(self.key_words[0], self.key_words[1], self.key_words[2]), 'w') as csvfile: writer = csv.writer(csvfile) writer.writerow(['公司简介', '职位名称', '职位信息'])
def get_cookie(): url = "http://wenshu.court.gov.cn/list/list/?sorttype=1" # option = webdriver.Chrome() # option = ChromeOptions() # 实例化一个ChromeOptions对象 option = FirefoxOptions() # option.add_argument("excludeSwitches=['enable-automation']") option.add_argument( 'user-agent="Mozilla/5.0 (iPod; U; CPU iPhone OS 2_1 like Mac OS X; ja-jp) AppleWebKit/525.18.1 (KHTML, like Gecko) Version/3.1.1 Mobile/5F137 Safari/525.20"' ) # option.add_experimental_option('excludeSwitches', ['enable-automation']) # 以键值对的形式加入参数 # proxy = get_proxy() proxy = "http://120.83.110.219:9999" # print(proxy) option.add_argument(f"--proxy-server={proxy}") # option = webdriver.Chrome(executable_path='D:/Software/Python27/Scripts/chromedriver.exe', options=option) # 在调用浏览器驱动时传入option参数就能实现undefined option = webdriver.Firefox( executable_path='D:/Software/Python27/Scripts/geckodriver.exe', options=option) # option = webdriver.Firefox() option.delete_all_cookies() option.get(url) time.sleep(15) cookie_str = '' print(option.get_cookies()) for i in option.get_cookies(): name = i['name'] value = i['value'] str1 = name + '=' + value + '; ' cookie_str += str1 print(cookie_str) option.close() return cookie_str
def test_passing_firefox_options(self): firefox_options = FirefoxOptions() firefox_options.add_argument("--headless") self.driver = get_webdriver_for("firefox", options=firefox_options) self.assertTrue(self.driver.capabilities["moz:headless"])
def __init__(self): #在切换线路时会有四个实际URL,全部放在此即可 self.url = [ "http://deepmixaasic2p6vm6f4d4g52e4ve6t37ejtti4holhhkdsmq3jsf3id.onion", "http://deepmixjso4ero6h3psxskkb756offo3uznx4a44vuc5464mjkqwndyd.onion", ] self.table_file = "data/Table.json" #储存列表 self.data_file = "data/Total_detail_Final.json" #储存所有数据 fireFoxOptions = FirefoxOptions() #headless模式(必须添加) fireFoxOptions.add_argument("--headless") #确认浏览器源(必须是tor browser 中的火狐浏览器, 普通火狐浏览器会卡在登录界面) binary = FirefoxBinary("/home/student/tor-browser/Browser/firefox") #禁用js fireFoxOptions.set_preference("javascript.enabled", False) fireFoxOptions.set_preference('network.proxy.type', 1) ## 设置代理IP fireFoxOptions.set_preference('network.proxy.http', '127.0.0.1') ## 设置代理端口 fireFoxOptions.set_preference('network.proxy.http_port', 8118) ## 设置https协议 fireFoxOptions.set_preference('network.proxy.ssl', '127.0.0.1') fireFoxOptions.set_preference('network.proxy.ssl_port', 8118) #启动浏览器 self.browser = webdriver.Firefox(firefox_options=fireFoxOptions, firefox_binary=binary)
def scrap(to_sql=False): opts = FirefoxOptions() opts.add_argument("--headless") driver = webdriver.Firefox(options=opts) most_active_url = "https://www.nasdaq.com/market-activity/most-active" driver.get(most_active_url) table = driver.find_element_by_css_selector( 'div.most-active__data-container--share-volume').find_elements_by_css_selector("tr.most-active__row") most_active_list = [] for row in table: # print(row.text) most_active = dict() most_active['symbol'] = row.find_elements_by_css_selector("td.most-active__cell.most-active__cell--heading")[0].text most_active['name'] = row.find_elements_by_css_selector("td.most-active__cell.most-active__cell--heading")[1].text most_active['last'] = row.find_elements_by_css_selector("td.most-active__cell.most-active__cell--heading")[2].text most_active['change'] = row.find_elements_by_css_selector("td.most-active__cell.most-active__cell--heading")[3].text most_active['volume'] = row.find_elements_by_css_selector("td.most-active__cell.most-active__cell--heading")[4].text if to_sql: db.session.add(MostActive(most_active["symbol"], most_active["name"], float(most_active["last"][1:]), float(most_active["change"]), float(most_active["volume"].replace(",","")))) db.session.commit() most_active_list.append(most_active) df = pd.DataFrame(most_active_list) return render_template('scraper/index.html', tables=[df.to_html(classes='data')], titles=df.columns.values)
def test4(): options = FirefoxOptions() options.add_argument('--headless') dr = webdriver.Firefox(firefox_options=options) dr.get("https://www.baidu.com") print(dr.current_url) dr.close()
def driver_open(url, the_encoding="utf-8", timeout=3): from selenium.webdriver import FirefoxOptions from selenium import webdriver import time opts = FirefoxOptions() opts.add_argument("--headless") driver = webdriver.Firefox(firefox_options=opts) driver.set_page_load_timeout(timeout) #driver.set_script_timeout(3) try: res1 = driver.get( url ) ## may jumpout timeout error, the js has just finish load, reutrn the innerhtml except: time.sleep(5) finally: #time.sleep(5) print("++++++++++++++++++++++++++++++++++++++++") print("++++++++++++ run finnaly +++++++++++++++") print("++++++++++++++++++++++++++++++++++++++++") html2 = driver.execute_script( "return document.documentElement.innerHTML;") soup1 = BS(html2.encode(the_encoding)) driver.close() #os.system('pkill phantomjs') return soup1
def scrape_video_no_protection(url: str) -> str: """ Gets video url directly from page. Raises a NoVideoAvailableException if no player is found """ # opens a driver on the given url options = FirefoxOptions(); options.add_argument("-headless") driver = webdriver.Firefox(options=options) driver.get(url) try: # clicks play to start video and load video url in the page play_button = driver.find_element_by_xpath("//div[@class = '{}']".format(PLAY_BUTTON_CLASS)) play_button.click() # gets video url from page once is loaded video_player_element = driver.find_element_by_xpath("//video[@class = '{}']".format(VIDEO_ELEMENT_CLASS)) video_url = video_player_element.get_attribute('src') except NoSuchElementException: raise NoVideoAvailableException(NO_VIDEO_MESSAGE) # closes driver driver.close() return video_url
def setUp(self): opts = FirefoxOptions() opts.add_argument("--headless") self.browser = webdriver.Firefox(firefox_options=opts) staging_server = os.environ.get("STAGING_SERVER") if staging_server: self.live_server_url = f'http://{staging_server}'
def firefox_browser(): """firefox browser""" options = FirefoxOptions() options.add_argument('--headless') options.add_argument('--start-fullscreen') web_driver = webdriver.Firefox(options=options) yield web_driver web_driver.quit()
def setUp(self): self.db = DBCreatorTester() options = FirefoxOptions() options.add_argument('-headless') self.selenium = Firefox(options=options) self.selenium.implicitly_wait(5)
def __init__(self, folder, profile, username, password): self.folder = folder self.username = username self.password = password firefox_options = FirefoxOptions() firefox_profile = FirefoxProfile('/home/aniquetahir/.mozilla/firefox/'+profile) firefox_options.add_argument('-headless') self.webdriver = Firefox(firefox_profile, executable_path='/home/aniquetahir/youtube-upload-folder/geckodriver',firefox_options=firefox_options, firefox_binary='/home/aniquetahir/firefox/firefox')
def setup_browser(): opts = FirefoxOptions() opts.add_argument("--headless") opts.add_argument( "user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36" ) opts.set_preference("geo.enable", True) browser = webdriver.Firefox(options=opts) return browser
def __init__(self, testConf): opts = FirefoxOptions() opts.add_argument("--headless") self.driver = webdriver.Firefox( executable_path=GeckoDriverManager().install(), firefox_options=opts) print(testConf) with open(testConf) as testConfFp: self.testConf = json.load(testConfFp)
def get_search_results(minresults=40): """Collect property urls and types by going through the search result pages of new houses and appartments, stopping when having reached the minimum number of results and returning a dictionary of {'url1':True/False, 'url2':True/False, ...}. True means house. False means apartment. Without argument only the first page is collected (~60 results)""" search_results = {} result_count = 0 # set on which page to start the search page_number = 1 options = FirefoxOptions() options.add_argument('-headless') options.set_preference("dom.webdriver.enabled", False) profile = FirefoxProfile('src/scraping/bolzyxyb.heroku') profile.set_preference('useAutomationExtension', False) driver = Firefox(firefox_binary='usr/lib/firefox/firefox', options=options, firefox_profile=profile) driver.implicitly_wait(15) # start the progress indicator and timeout logic start_time = time.monotonic() time_spent = 0 while result_count < minresults and time_spent < 1800: # for each loop, scrape one results page of houses and one of appartments # the results are added if they are not there yet for houselink in get_page_urls(pagenr=page_number, kind="house", drv=driver): if houselink not in search_results: search_results[houselink] = True for apartmentlink in get_page_urls(pagenr=page_number, kind="apartment", drv=driver): if apartmentlink not in search_results: search_results[apartmentlink] = False result_count = len(search_results) page_number += 1 # update progress indicator time_spent = time.monotonic() - start_time total_time_estimation = 1 / (result_count / minresults) * time_spent if total_time_estimation > 1800: capped_time = 1800 else: capped_time = total_time_estimation time_remaining = capped_time - time_spent print(f"Finishing in {time_remaining/60:.1f} minutes") driver.close() print("Finished") return search_results
def test_example(): gdd = GeckoDriverManager() gdd.download_and_install() option = FirefoxOptions() option.add_argument("--kiosk") # option.headless = True wd = webdriver.Firefox(options=option) wd.get("https://otus.ru/") assert wd.title == 'Онлайн‑курсы для профессионалов, дистанционное обучение современным профессиям' wd.quit()
def __init__(self): options = FirefoxOptions() profile = FirefoxProfile() user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:88.0) Gecko/20100101 Firefox/88.0" profile.set_preference("general.useragent.override", user_agent) options.add_argument("--headless") self.driver = webdriver.Firefox(firefox_profile=profile, options=options) self.driver.get("https://www.cowin.gov.in/home") time.sleep(3)
def test_empty_reset(self): opts = FirefoxOptions() opts.add_argument("--headless") driver = webdriver.Firefox(firefox_options=opts) driver.get("http://127.0.0.1:8000/verificacion/") element = driver.find_element_by_id("id_input") element.send_keys(" ") button = driver.find_element_by_id("reset") button.click() self.assertIn("", driver.find_element_by_id("id_input").text)
def __init__(self, firefox_path=None, gecko_path=None): binary = FirefoxBinary(firefox_path) opts = FirefoxOptions() opts.add_argument("--headless") self.driver = webdriver.Firefox( firefox_options=opts, firefox_binary=binary, executable_path=gecko_path ) self.url = "https://www.xiami.com/" self.collect_url = "https://www.xiami.com/collect/" self.collect_pattern = re.compile('<a href="/collect/([^"]+?)">') self.song_pattern = re.compile('<a href="/song/([^"]+?)">')
def get_aws_credentials(email, pw): opts = FirefoxOptions() opts.add_argument("--headless") driver = Firefox(firefox_options=opts) # driver = webdriver.Remote( # command_executor='http://*****:*****@class='hdNavTop']")[4] aws_account.click() time.sleep(20) credit = driver.find_element_by_class_name( "uiOutputRichText").find_elements_by_tag_name("strong")[0].text endtime = driver.find_element_by_class_name( "uiOutputRichText").find_elements_by_tag_name("strong")[1].text print( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + "\t" + "Credits: 【" + credit + "】,End Time: 【" + endtime + "】") account = driver.find_element_by_class_name("btn") account.click() currentTab = driver.current_window_handle print( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + "\t" + "Go Vocareum...") time.sleep(20) vocareum_tab = driver.window_handles[1] driver.switch_to.window(vocareum_tab) account_details = driver.find_element_by_id("showawsdetail") account_details.click() time.sleep(10) print( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + "\t" + "Get Credentail...") show_key_btn = driver.find_element_by_id("clikeyboxbtn") show_key_btn.click() time.sleep(5) span_tags = driver.find_elements_by_tag_name("span") for tag in span_tags: if '[default]' in tag.text: text = tag.text return driver, text
def create_engagement(username, password, engagement): try : from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.webdriver.support.ui import Select from selenium.webdriver import FirefoxOptions import datetime import time import os import re import random import string from relay42_libmodules import * except ImportError: print ("failed to load selenium module for create_engagement") exit() try : opts = FirefoxOptions() opts.add_argument("--headless") driver = webdriver.Firefox(firefox_options=opts) driver.get("https://admin.relay42.com") time.sleep(2) driver.find_element_by_name("username").send_keys(username) driver.find_element_by_name("password").send_keys(password) driver.find_element_by_xpath('//button[@type="submit"]').click() time.sleep(5) driver.get("https://admin.relay42.com/site-1233/profiles/engagements/add") time.sleep(5) driver.find_element_by_xpath('//div[@class="controls input-container"]//input[@id="data-interaction-mainFieldValue"]').click() driver.find_element_by_xpath('//div[@class="controls input-container"]//input[@id="data-interaction-mainFieldValue"]').send_keys(engagement) time.sleep(5) descrption = engagement + "Am I a fruit or a phone" driver.find_element_by_xpath('//div[@class="controls input-container"]//textarea[@id="data-interaction-description"]').click() driver.find_element_by_xpath('//div[@class="controls input-container"]//textarea[@id="data-interaction-description"]').send_keys(descrption) driver.find_element_by_xpath('//button[@type="submit"]').click() time.sleep(5) reference_engage_link= driver.current_url time.sleep(5) driver.quit() report_status(reference_engage_link) time.sleep(5) reportstring ="successfully created engagement "+engagement report_status(reportstring) teststatus = "testcase_createEngagement:PASS" report_status(teststatus) return reference_engage_link except : reportstring ="something went wrong with engagement creation" report_status(reportstring) teststatus = "testcase_createEngagement:FAIL" report_status(teststatus) exit()
def test_wrong_url_submit(self): opts = FirefoxOptions() opts.add_argument("--headless") driver = webdriver.Firefox(firefox_options=opts) driver.get("http://127.0.0.1:8000/verificacion/") element = driver.find_element_by_id("id_input") element.send_keys("badurl") button = driver.find_element_by_id("submit") button.click() element = driver.find_element_by_id("error") self.assertEqual(element.text, "Wrong url")