def extract_1page_rev(review_link): opts = Options() opts.set_headless() opts.set_preference("permissions.default.image", 2) assert opts.headless # Operating in headless mode reviews = [] dates = [] ratings = [] now = time.time() browser = webdriver.Firefox(options=opts) # browser = webdriver.Firefox() browser.implicitly_wait(10) browser.get(review_link) review_zone = browser.find_element_by_id("taplc_location_reviews_list_resp_hr_resp_0") # find the first button more_buttons = review_zone.find_elements_by_css_selector("div.prw_rup.prw_reviews_text_summary_hsx > div > p > span") if len(more_buttons) > 0: more_buttons[0].click() time.sleep(.1) revs = review_zone.find_elements_by_css_selector("div.ui_column.is-9 > div.prw_rup.prw_reviews_text_summary_hsx > div > p") dats = review_zone.find_elements_by_css_selector("div.ui_column.is-9 > span.ratingDate") rats = review_zone.find_elements_by_css_selector("div.ui_column.is-9 > span.ui_bubble_rating") assert len(dats) == len(rats) == len(revs) for i, rev in enumerate(revs): reviews.append(rev.text) ratings.append(rats[i].get_attribute("class").split()[-1].split("_")[-1]) dates.append(dats[i].get_attribute("title")) browser.quit() print("Streaming %d reviews took %.4f (seconds)" % (len(reviews), time.time()-now)) return reviews, ratings, dates
def launch(self): #custom useragent for fetching mobile version of facebook userAgent = "Mozilla/5.0 (Linux; Android 4.2.1; en-us;\ Nexus 5 Build/JOP40D) AppleWebKit/535.19 (\ KHTML, like Gecko) Chrome/18.0.1025.166 \ Mobile Safari/535.19" # set options for mobile view options = Options() options.set_capability("deviceName", "iPhone") options.set_preference("general.useragent.override", userAgent) # open window only for debugging if self.headless: self.msg("headless mode") options.add_argument('-headless') # launch the browser #TODO: Add support for other browsers self.driver = Firefox( executable_path=self.driverPath, options=options) self.wait = WebDriverWait(self.driver, timeout=100) self.msg("initiated") self.driver.get(self.url) self.msg("opening site "+str(self.url))
def sign_in(values): start = time.time() options = Options() options.headless = True options.set_preference("media.volume_scale", "0.0") browser = webdriver.Firefox(options=options, executable_path=geckodriver_exe_file_path) # SEND LOGIN AND PASSWORD FOR YOUTUBE browser.get(( 'https://accounts.google.com/signin/v2/identifier?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26hl%3Den%26app%3Ddesktop%26action_handle_signin%3Dtrue&hl=en&uilel=3&service=youtube&flowName=GlifWebSignIn&flowEntry=ServiceLogin&cid=1&navigationDirection=forward' )) send_login(browser, username) send_password(browser, password) video_id_, need_subscribe, evaluate_way = values browser.get("https://www.youtube.com/watch?v={}".format(video_id_)) sleep(1) # 5 status(browser) browser.implicitly_wait(4) if evaluate_way in ["dislike", "like", "dismiss_all"]: evaluate_video(browser, evaluate_way) subscribe_unsubscribe(browser, need_subscribe) status(browser) browser.quit() end = time.time() print(end - start) sleep(1)
def test_accepts_w3c_sauce_options_capabilities(self): browser_options = FirefoxOptions() browser_options.add_argument('--foo') browser_options.set_preference('foo', 'bar') options = {'maxDuration': 1, 'commandTimeout': 2} w3c_options = { 'acceptInsecureCerts': True, 'pageLoadStrategy': 'eager' } options.update(w3c_options) sauce = SauceOptions(seleniumOptions=browser_options, **options) assert sauce.browser_name == 'firefox' assert sauce.accept_insecure_certs is True assert sauce.page_load_strategy == 'eager' assert sauce.max_duration == 1 assert sauce.command_timeout == 2 assert sauce.selenium_options['moz:firefoxOptions'] == { 'args': ['--foo'], 'prefs': { 'foo': 'bar' } }
def test_to_capabilities(self): opts = Options() assert opts.to_capabilities() == {} profile = FirefoxProfile() opts.profile = profile caps = opts.to_capabilities() assert "moz:firefoxOptions" in caps assert "profile" in caps["moz:firefoxOptions"] assert isinstance(caps["moz:firefoxOptions"]["profile"], basestring) assert caps["moz:firefoxOptions"]["profile"] == profile.encoded opts.add_argument("--foo") caps = opts.to_capabilities() assert "moz:firefoxOptions" in caps assert "args" in caps["moz:firefoxOptions"] assert caps["moz:firefoxOptions"]["args"] == ["--foo"] binary = FirefoxBinary() opts.binary = binary caps = opts.to_capabilities() assert "moz:firefoxOptions" in caps assert "binary" in caps["moz:firefoxOptions"] assert isinstance(caps["moz:firefoxOptions"]["binary"], basestring) assert caps["moz:firefoxOptions"]["binary"] == binary._start_cmd opts.set_preference("spam", "ham") caps = opts.to_capabilities() assert "moz:firefoxOptions" in caps assert "prefs" in caps["moz:firefoxOptions"] assert isinstance(caps["moz:firefoxOptions"]["prefs"], dict) assert caps["moz:firefoxOptions"]["prefs"]["spam"] == "ham"
def DoCo(co): # co = companies[18]; co = DoCo( co ) co['timeOut'] = 10 * 60 co['timeStart'] = datetime.now().timestamp() co['newsPages'] = {} try: options = Options() options.headless = True options.set_preference("security.sandbox.content.level", 5) br = Firefox(options=options) br.set_page_load_timeout(60) except: log.exception(co['coWebsite']) co['timeStop'] = co['timeStart'] return co try: co = GetEnglishHTML(co, br) gr = GetCompanyWebsiteGraph(co, br) co = FindNewsPage(co, gr) except: log.exception(co['coWebsite']) br.quit() co['timeStop'] = datetime.now().timestamp() return co
def test_capabilities_for_selenium(self): browser_options = FirefoxOptions() browser_options.add_argument('--foo') browser_options.set_preference('foo', 'bar') options = SauceOptions(seleniumOptions=browser_options) options.build = 'Sample Build Name' expected_capabilities = { 'acceptInsecureCerts': True, 'browserName': 'firefox', 'browserVersion': 'latest', 'platformName': 'Windows 10', 'marionette': True, 'moz:firefoxOptions': { 'args': ['--foo'], 'prefs': { 'foo': 'bar' } }, 'sauce:options': { 'build': 'Sample Build Name' } } assert options.to_capabilities() == expected_capabilities
def test_to_capabilities(self): opts = Options() firefox_caps = DesiredCapabilities.FIREFOX.copy() firefox_caps.update({"pageLoadStrategy": "normal"}) assert opts.to_capabilities() == firefox_caps profile = FirefoxProfile() opts.profile = profile caps = opts.to_capabilities() assert "moz:firefoxOptions" in caps assert "profile" in caps["moz:firefoxOptions"] assert isinstance(caps["moz:firefoxOptions"]["profile"], str) assert caps["moz:firefoxOptions"]["profile"] == profile.encoded opts.add_argument("--foo") caps = opts.to_capabilities() assert "moz:firefoxOptions" in caps assert "args" in caps["moz:firefoxOptions"] assert caps["moz:firefoxOptions"]["args"] == ["--foo"] binary = FirefoxBinary() opts.binary = binary caps = opts.to_capabilities() assert "moz:firefoxOptions" in caps assert "binary" in caps["moz:firefoxOptions"] assert isinstance(caps["moz:firefoxOptions"]["binary"], str) assert caps["moz:firefoxOptions"]["binary"] == binary._start_cmd opts.set_preference("spam", "ham") caps = opts.to_capabilities() assert "moz:firefoxOptions" in caps assert "prefs" in caps["moz:firefoxOptions"] assert isinstance(caps["moz:firefoxOptions"]["prefs"], dict) assert caps["moz:firefoxOptions"]["prefs"]["spam"] == "ham"
class FirefoxHeadlessDriver(BaseDriver): def __init__(self): self.options = Options() self.options.add_argument('--headless') self.options.set_preference("permissions.default.stylesheet", 2) self.options.set_preference("permissions.default.image", 2) self.driver = webdriver.Firefox(firefox_options=self.options, executable_path=FIREFOX_EXE_PATH) self.driver.set_page_load_timeout(PAGE_LOAD_TIMEOUT) self.driver.set_script_timeout(SCRIPT_LOAD_TIMEOUT) def get_html(self, url): try: self.driver.get(url) except Exception as e: if isinstance(e, TimeoutException): print 'timeout: %s' % e.message return self.driver.page_source.encode('utf-8') else: print e.message return '' return self.driver.page_source.encode('utf-8') def setting(self, settings={}): pass
def __init__(self, debug=False): options = Options() if not debug: options.headless = True options.set_preference("browser.privatebrowsing.autostart", True) self.driver = webdriver.Firefox(options=options) self.wait = WebDriverWait(self.driver, 10)
def session_create(config): log.info("Creating session") options = Options() if config.get('headless', False) is True: log.info("Headless mode") options.add_argument("--headless") if config.get('webdriver_enabled') is False: options.set_preference("dom.webdriver.enabled", False) # selenium_profile = webdriver.FirefoxProfile('/home/container-dev/.mozilla/firefox/') # selenium_profile.update_preferences() # options.add_argument("-profile") # options.add_argument("/tmp/ff1") # driver = webdriver.Firefox(options=options, service_log_path=path.join("..", "data","geckodriver.log")) # driver = webdriver.Chrome() profile = FirefoxProfile() profile.set_preference("dom.webdriver.enabled", False) profile.set_preference('useAutomationExtension', False) profile.update_preferences() desired = DesiredCapabilities.FIREFOX driver = webdriver.Firefox(options=options, firefox_profile=profile, desired_capabilities=desired) log.info("New session is: %s %s" % (driver.session_id, driver.command_executor._url)) return driver
def __init__(self, executable_path='geckodriver'): # Firefox options to download pdfs without asking options = Options() #options.set_preference("browser.download.folderList", 2); #options.set_preference("browser.download.dir", ".") #options.set_preference("browser.download.useDownloadDir", True) options.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/pdf;application/zip") options.set_preference("pdfjs.disabled", True) self.selenium = webdriver.Firefox(executable_path=executable_path, options=options) # go to signin page self.selenium.get( 'https://admit.applyweb.com/admit/shibboleth/northwestern') # wait for human to log in WebDriverWait(self.selenium, 99999).until( expected_conditions.url_contains( 'https://admit.applyweb.com/admit/gwt')) # click "admit" button WebDriverWait(self.selenium, 20).until( expected_conditions.presence_of_element_located( (By.CSS_SELECTOR, "td.button"))) for button in self.selenium.find_elements_by_css_selector('td.button'): if button.text == 'Admit': button.click() # wait for contents to render WebDriverWait(self.selenium, 1).until( expected_conditions.text_to_be_present_in_element( (By.ID, "isc_PoolTreeWindow_3_0_valueCell0"), 'MS'))
def collect_ar_company_list(): options = Options() options.headless = True options.set_preference("security.sandbox.content.level", 5) browser = webdriver.Firefox(options=options) company_list = [] for i in range(1, 10): annual_reports_webpage = 'http://www.xxxxxxxxxxx.com/Companies?exch=%s' % i print(annual_reports_webpage) browser.get(annual_reports_webpage) try: tbody = WebDriverWait(browser, 30).until( EC.presence_of_element_located((By.XPATH, "//table/tbody"))) except Exception as e: print(annual_reports_webpage, e) tbody.get_attribute('innerHTML') for tr in tbody.find_elements_by_xpath(".//tr"): row = {} td = tr.find_elements_by_xpath(".//td") row['CompanyNameAr'] = td[0].text row['UrlAr'] = td[0].find_element_by_xpath(".//a").get_attribute( 'href') company_list.append(row) return pd.DataFrame(company_list)
def __init__(self, username, password): self.username = username self.password = password options = Options() options.set_preference("dom.webnotifications.enabled", False) self.browser = webdriver.Firefox(firefox_options = options) self.browser.get('http://facebook.com')
def __init__(self, headless=False): self.headless = headless options = Options() options.set_preference("browser.link.open_newwindow", 1) profile = webdriver.FirefoxProfile() options.headless = headless self.driver = webdriver.Firefox(options=options, firefox_profile=profile)
def build_browser(): logging.info('Starting browser driver') options = Options() options.headless = True options.set_preference('dom.webnotifications.enabled', False) browser = webdriver.Firefox(options=options) return browser
def __init__(self, headless): __options = Options() __options.headless = headless __options.set_preference('devtools.jsonview.enabled', False) __options.binary_location = "C:/firefox_binary/firefox.exe" self.__driver = webdriver.Firefox( firefox_options=__options, executable_path='./driver/geckodriver.exe')
def setup_driver(): global wd driver_options=Options() driver_options.binary_location = r"PATH TO YOUR FIREFOX EXECUTABLE" driver_options.add_argument('--disable-dev-shm-usage') driver_options.set_preference("media.navigator.permission.disabled", True) wd=webdriver.Firefox(executable_path="PATH TO YOUR GECKODRIVER",options=driver_options)
def test_accepts_selenium_browser_options_instance(self): options = FirefoxOptions() options.add_argument('--foo') options.set_preference('foo', 'bar') sauce = SauceOptions(seleniumOptions=options) assert sauce.browser_name == 'firefox' assert sauce.selenium_options['moz:firefoxOptions'] == {'args': ['--foo'], 'prefs': {'foo': 'bar'}}
def driver(self): if not getattr(self, "_driver", None): options = Options() if not self.show_browser: options.headless = True if self.profile_path: options.set_preference('profile', self.profile_path) self._driver = webdriver.Firefox(options=options) return self._driver
def _default_browser(): display = Display(visible=False, size=(800, 600)) display.start() firefox_options = Options() firefox_options.headless = True firefox_options.set_preference("gfx.webrender.all", True) service = Service(Constants.FIREFOX_BIN_PATH, log_path=Constants.LOG_FILE_PATH) return webdriver.Firefox(service=service, options=firefox_options)
class SessionHandler(): def __init__(self, bank_name): if (bank_name not in SUPPORTED_BANKS): raise Exception( f'The bank {bank_name} is currently not supported.') self.bank_name = bank_name if (bank_name == 'TD'): self.bank = TD() elif (bank_name == 'AMEX'): self.bank = AMEX() def set_browser_options(self): self.options = Options() self.options.set_preference("browser.download.folderList", 2) self.options.set_preference( "browser.download.manager.showWhenStarting", False) self.options.set_preference( "browser.download.dir", os.path.join(os.path.dirname(os.path.realpath(__file__)), "data")) self.options.set_preference("browser.download.useDownloadDir", True) self.options.set_preference( "browser.helperApps.neverAsk.saveToDisk", "text/csv,text/x-csv,application/csv,application/x-csv,text/plain,text/comma-separated-values,text/x-comma-separated-values,application/octet-stream,application/vnd.ms-excel,text/tab-separated-values" ) def start_session(self): self.set_browser_options() self.driver = webdriver.Firefox(firefox_options=self.options) print("Session Started") def close_session(self): self.driver.close() print("Session Closed") def establish_session(self, user_name=None, password=None, service_name=None): self.driver = self.bank.login(self.driver, selection_params=service_name, user_name=user_name, password=password) def retrieve_transactions_for(self, key_word=None, cycle_to_retrieve=None, format_to_retrieve="CSV"): self.driver = self.bank.navigate_to_downloads( self.driver, account_selector=key_word, data_format=format_to_retrieve) self.driver = self.bank.select_cycle_to_download( self.driver, cycle_index=cycle_to_retrieve) self.close_session()
def get_web_driver(): opts = Options() opts.set_headless(True) opts.set_preference("network.cookie.alwaysAcceptSessionCookies", True) opts.set_preference("network.cookie.cookieBehavior", 1) browser = webdriver.Firefox(firefox_options=opts, log_path='/tmp/geckodriver.log') browser.implicitly_wait(10) return browser
def firefox_options(request): options = Options() for arg in get_arguments_from_markers(request.node): options.add_argument(arg) for name, value in get_preferences_from_markers(request.node).items(): options.set_preference(name, value) return options
def FireFox(self): from selenium import webdriver from selenium.webdriver.firefox.options import Options options = Options() options.add_argument('-headless') options.add_argument('--disable-gpu') # 禁用GPU加速 options.set_preference('permissions.default.image', 2) # 禁止加载图片 options.add_argument('--window-size=1280,800') # 设置窗口大小 browser = webdriver.Firefox(executable_path='./geckodriver.exe', firefox_options=options) return browser
def test_capabilities_for_selenium(self): browser_options = FirefoxOptions() browser_options.add_argument('--foo') browser_options.set_preference('foo', 'bar') sauce = SauceOptions(seleniumOptions=browser_options) capabilities = sauce.to_capabilities() assert capabilities['moz:firefoxOptions']['args'] == ['--foo'] assert capabilities['moz:firefoxOptions']['prefs'] == {'foo': 'bar'}
def site_desktop(url): opts = Options() opts.set_preference('dom.ipc.plugins.enabled.libflashplayer.so', 'true') opts.set_preference('plugin.state.flash', 2) opts.set_headless(True) driver = webdriver.Firefox(executable_path=gekodriver_firefox, options=opts) driver.set_window_size(1920, 2200) driver.get(url) return driver
def __init__(self): preferences = { "browser.download.folderList": 2, "browser.download.manager.showWhenStarting": False, "browser.download.dir": os.getcwd(), "browser.helperApps.neverAsk.saveToDisk": "text/csv" } options = Options() options.headless = True for pref in preferences: options.set_preference(pref, preferences.get(pref)) self.browser = webdriver.Firefox(options=options)
def getDriver(): options = Options() options.log.level = "trace" options.add_argument("--headless") options.set_preference("browser.download.manager.showWhenStarting", False) options.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/csv") driver = webdriver.Firefox(options=options) driver.set_page_load_timeout("60") driver.get(enlace) return driver
def scrape_damage_parse_data(wcl_string,fight_id): ignore_specs = {'Monk-Mistweaver', 'Paladin-Holy', 'Druid-Restoration', 'Priest-Discipline', 'Priest-Holy', 'Shaman-Restoration'} options = Options() options.add_argument('--headless') options.set_preference("dom.max_script_run_time", 5) options.set_preference("http.response.timeout", 5) driver = webdriver.Firefox(firefox_options=options) driver.set_page_load_timeout(10) try: driver.get('https://www.warcraftlogs.com/reports/'+wcl_string+'#fight='+str(fight_id)+'&type=damage-done') except: pass all_tablerows_selector = '//div[@id="table-container"]/div/table/tbody/tr' trs = driver.find_elements_by_xpath(all_tablerows_selector) tablerows = len(trs) print(tablerows) html = driver.page_source driver.quit() soup = BeautifulSoup(html, "lxml") single_fight_parse_scrape_data = dict() for tablerow in soup.find_all(id=re.compile('main-table-row')): if not tablerow.find(class_='main-table-performance') or not tablerow.find(class_='main-table-link') or not tablerow.find(class_='main-table-ilvl-performance'): continue if tablerow.find(class_='main-table-link').a.string.strip() == "Hati": continue if tablerow.img['src']: if re.search(r'icons/.*\.', tablerow.img['src']).group()[6:-1] in ignore_specs: continue try: overall_performance = int(tablerow.find(class_='main-table-performance').a.text.strip()) except: overall_performance = 0 player_name = tablerow.find(class_='main-table-link').a.string.strip() try: ilvl_performance = int(tablerow.find(class_='main-table-ilvl-performance').a.text.strip()) except: ilvl_performance = 0 single_fight_parse_scrape_data[player_name] = ({'overall-performance': overall_performance, 'ilvl-performance': ilvl_performance}) return single_fight_parse_scrape_data
def firefox_options(request, firefox_path, firefox_profile): options = Options() if firefox_profile is not None: options.profile = firefox_profile if firefox_path is not None: options.binary = FirefoxBinary(firefox_path) args = request.node.get_marker('firefox_arguments') if args is not None: for arg in args.args: options.add_argument(arg) prefs = request.node.get_marker('firefox_preferences') if prefs is not None: for name, value in prefs.args[0].items(): options.set_preference(name, value) return options
def test_prefs(self): opts = Options() assert len(opts.preferences) == 0 assert isinstance(opts.preferences, dict) opts.set_preference("spam", "ham") assert len(opts.preferences) == 1 opts.set_preference("eggs", True) assert len(opts.preferences) == 2 opts.set_preference("spam", "spam") assert len(opts.preferences) == 2 assert opts.preferences == {"spam": "spam", "eggs": True}
def driver_kwargs(request, driver_kwargs): options = Options() options.set_preference('browser.startup.homepage_override.mstone', '') options.set_preference('startup.homepage_welcome_url', 'about:') driver_kwargs['firefox_options'] = options return driver_kwargs