def __init_browser(self): """ configure the web driver """ self.__options = webdriver.FirefoxOptions() self.__options.headless = True self.__options.accept_insecure_certs = True self.__geckodriver_binary = self.__args.geckodriver self.__firefox_binary = FirefoxBinary(self.__args.firefox) # Set firefox profile self.__profile = webdriver.FirefoxProfile() firefox_profile(self.__profile) if self.__browser is not None: self.__browser.close() if self.__args.console: self.__browser = webdriver.Firefox( options=self.__options, firefox_binary=self.__firefox_binary, firefox_profile=self.__profile, executable_path=self.__geckodriver_binary, log_path=os.path.devnull) self.__browser.set_window_size(1920, 1080) else: self.__browser = webdriver.Firefox( options=self.__options, firefox_binary=self.__firefox_binary, firefox_profile=self.__profile, executable_path=self.__geckodriver_binary, log_path=self.__args.log) self.__browser.set_window_size(1920, 1080) self.__wait = WebDriverWait(self.__browser, self.__args.timeout)
def set_driver_for_browser(self, browser_name): """expects browser name and returns a driver instance""" # if browser is suppose to be chrome if browser_name.lower() == "chrome": browser_option = ChromeOptions() # automatically installs chromedriver and initialize it and returns the instance if self.proxy is not None: options = { 'https': 'https://{}'.format(self.proxy.replace(" ", "")), 'http': 'http://{}'.format(self.proxy.replace(" ", "")), 'no_proxy': 'localhost, 127.0.0.1' } print("Using: {}".format(self.proxy)) return webdriver.Chrome(executable_path=ChromeDriverManager().install(), options=self.set_properties(browser_option), seleniumwire_options=options) return webdriver.Chrome(executable_path=ChromeDriverManager().install(), options=self.set_properties(browser_option)) elif browser_name.lower() == "firefox": browser_option = FirefoxOptions() if self.proxy is not None: options = { 'https': 'https://{}'.format(self.proxy.replace(" ", "")), 'http': 'http://{}'.format(self.proxy.replace(" ", "")), 'no_proxy': 'localhost, 127.0.0.1' } print("Using: {}".format(self.proxy)) return webdriver.Firefox(executable_path=GeckoDriverManager().install(), options=self.set_properties(browser_option), seleniumwire_options=options) # automatically installs geckodriver and initialize it and returns the instance return webdriver.Firefox(executable_path=GeckoDriverManager().install(), options=self.set_properties(browser_option)) else: # if browser_name is not chrome neither firefox than raise an exception raise Exception("Browser not supported!")
def get_fox( self, proxy, ): use_proxies = "Y" options = self.get_proxy(proxy) F_options = Options() F_options.headless = True binary = FirefoxBinary binary = "/home/ubuntu/Firefox" profile = webdriver.FirefoxProfile() profile.set_preference("dom.webdriver.enabled", False) profile.set_preference('useAutomationExtension', False) profile.update_preferences() desired = DesiredCapabilities.FIREFOX path = os.path.dirname(os.path.abspath(__file__)) gecko_path = str(path) + "/geckodriver.exe" if use_proxies == 'Y': self.drive = webdriver.Firefox(firefox_binary=binary, firefox_profile=profile, seleniumwire_options=options, desired_capabilities=desired, executable_path=gecko_path) else: self.drive = webdriver.Firefox(desired_capabilities=desired, executable_path=gecko_path) #self.drive = webdriver.Chrome(ChromeDriverManager().install(), seleniumwire_options=options) return self.drive
def _create_driver(self) -> webdriver.Firefox: profile = webdriver.FirefoxProfile() if self.profile_settings: for setting, value in self.profile_settings.items(): profile.set_preference(setting, value) if self.user_agent: profile.set_preference("general.useragent.override", self.user_agent) profile.update_preferences() if self.proxy: selenium_wire_options = { "proxy": { "http": self.proxy, "https": self.proxy }, "connection_keep_alive": True, "connection_timeout": 180, } else: selenium_wire_options = {} options = Options() options.headless = self.is_headless if not self.request_scope: selenium_wire_options["ignore_http_methods"] = ignore_http_methods driver = webdriver.Firefox( profile, options=options, seleniumwire_options=selenium_wire_options, ) if self.request_scope: driver.scopes = self.request_scope return driver
def load_op_matches(self): options = { 'connection_keep_alive': True, 'connection_timeout': None } #self.firefox = webdriver.Firefox(executable_path=r'../lib/geckodriver.exe',seleniumwire_options=options) self.firefox = webdriver.Firefox(executable_path=r'../lib/geckodriver.exe') self.firefox.scopes = ['fb.oddsportal.com/feed/match/*'] csv_name=self.OP_DATA_PATH+self.DATA_FILE csv_done_name=self.OP_DATA_PATH+self.DATA_DONE_FILE df_matches=pd.read_csv(csv_name, index_col=None) df_matches=df_matches.sample(frac=1).reset_index(drop=True) cmax=random.randint(30, 50) c=0 for row in df_matches[df_matches['done']==0].itertuples(): link=row.link file_name=self.OP_MATCHES_RAW_PATH+link.split('/')[4].split('-')[-1]+'.json' #print(link, file_name) html=self._load_link(file_name,link) if "oddsdata" in html: df_matches.at[row.Index, 'done'] = 1 if c==cmax: print('saving...') dfd=pd.read_csv(csv_done_name) df_matches0=df_matches[df_matches.done==0] df_matches1=df_matches[df_matches.done==1] pd.concat([dfd,df_matches1], axis=0).to_csv(csv_done_name, index=False) df_matches0.to_csv(csv_name, index=False) #df_matches.to_csv(csv_name, index=False) time.sleep(random.uniform(2, 5)) cmax=random.randint(30, 50) c=0 c+=1
def init_driver(binary_path, binary_type, stop_compression, proxy_add, proxy_port): """ Method to initialize a Selenium driver. Only support Firefox browser for now. Args: binary_path(str): the path to the 'firefox' executable binary_type(str): for now, binary type can only be 'FirefoxBinary'. Returns: driver(WebDriver): an initialized Selenium WebDriver. """ driver = None if binary_type == 'FirefoxBinary': binary = FirefoxBinary(binary_path) options = FirefoxOptions() options.add_argument('--headless') if stop_compression: options.set_preference('network.http.accept-encoding', '') options.set_preference('network.http.accept-encoding.secure', '') options.set_preference('devtools.cache.disabled', True) if proxy_add and proxy_port: options.set_preference('network.proxy.ftp', proxy_add) options.set_preference('network.proxy.ftp_port', proxy_port) options.set_preference('network.proxy.http', proxy_add) options.set_preference('network.proxy.http_port', proxy_port) options.set_preference('network.proxy.socks', proxy_add) options.set_preference('network.proxy.socks_port', proxy_port) options.set_preference('network.proxy.ssl', proxy_add) options.set_preference('network.proxy.ssl_port', proxy_port) options.set_preference('network.proxy.type', 1) driver = webdriver.Firefox(firefox_binary=binary, options=options) return driver
def make_headless_browser(custom_options={}): """无头浏览器""" # 使用系统代理 proxy = Proxy() proxy.proxy_type = 'SYSTEM' fp = FirefoxProfile() options = Options() # 无头浏览器 options.headless = True # 禁用gpu加速 options.add_argument('--disable-gpu') # 网页加载模式 # options.page_load_strategy = 'eager' default_options = {} default_options.update(custom_options) log_path = data_root('geckordriver') / f'{os.getpid()}.log' return webdriver.Firefox( options=options, seleniumwire_options=default_options, proxy=proxy, firefox_profile=fp, service_log_path=log_path, executable_path=DEFAULT_CONFIG['geckodriver_path'])
def __buildDriver__(self, driver_options): # Ubuntu if (os.name == 'posix'): chromedriver = 'chromedriver' # Windows if (os.name == 'nt'): chromedriver = 'chromedriver.exe' if chromedriver: if config.DRIVER_NAME == 'Chrome': logger.info("Using Chrome Driver ...") options = webdriver.ChromeOptions() options.add_experimental_option("excludeSwitches", ["enable-automation"]) options.add_experimental_option('useAutomationExtension', False) options.add_experimental_option('w3c', False) for driver_option in driver_options: # if "--proxy-server" in driver_option: # print(f"\nADDING PROXY: [{driver_option}]\n") options.add_argument(driver_option) capabilities = None if config.PROXY: # print(f"\nADDING PROXY: [{config.PROXY}]\n") # prox = Proxy() # prox.proxy_type = ProxyType.MANUAL # prox.http_proxy = config.PROXY # # prox.socks_proxy = config.PROXY # prox.ssl_proxy = config.PROXY capabilities = webdriver.DesiredCapabilities.CHROME capabilities['loggingPrefs'] = {'performance': 'ALL'} # prox.add_to_capabilities(capabilities) if capabilities: self.driver = webdriver.Chrome( desired_capabilities=capabilities, options=options) # self.driver = config.DRIVER_NAME(desired_capabilities=capabilities, options=options) else: self.driver = webdriver.Chrome(chromedriver, options=options) # self.driver = config.DRIVER_NAME(chromedriver, options=options) else: logger.info("Using Firefox Driver ...") self.driver = webdriver.Firefox() self.min_wait = WebDriverWait(self.driver, 5) self.max_wait = WebDriverWait(self.driver, 20) self.els_css = self.driver.find_elements_by_css_selector self.el_css = self.driver.find_element_by_css_selector self.els_xpath = self.driver.find_elements_by_xpath self.el_xpath = self.driver.find_element_by_xpath self.driver.set_script_timeout(30) return self.driver
def get_regions(account_id, username, password, headless): # region_dict = {"name": "US East", "location": "N. Virginia", "id": "us-east-1" } # return [region_dict] # region_dict = {"name": "Asia Pacific", "location": "Seoul", "id": "ap-northeast-2"} # return [region_dict] # region_dict = {"name": "Europe", "location": "Ireland", # "id": "eu-west-1"} # return [region_dict] driver_options = Options() driver_options.headless = headless driver = webdriver.Firefox(options=driver_options) wait = webdriver.support.ui.WebDriverWait(driver, 10) driver.get("https://{}.signin.aws.amazon.com/console".format(account_id)) username_element = driver.find_element_by_id("username") username_element.send_keys(username) password_element = driver.find_element_by_id("password") password_element.send_keys(password) driver.find_element_by_id("signin_button").click() wait.until(lambda driver: driver.find_element_by_name("awsc-mezz-data")) region_list_element = driver.find_element_by_name("awsc-mezz-data") region_list_str = region_list_element.get_attribute("content") region_list = json.loads(region_list_str)["regions"] driver.delete_all_cookies() driver.close() driver.quit() return region_list
async def test_access_dashboard(request, url): options = Options() options.headless = True options.log.level = "trace" max_wait = 20 # seconds kwargs = { "options": options, "seleniumwire_options": { "enable_har": True }, } with webdriver.Firefox(**kwargs) as driver: wait = WebDriverWait(driver, max_wait, 1, (JavascriptException, StopIteration)) for _ in range(60): try: driver.get(url) wait.until( expected_conditions.presence_of_element_located( (By.CLASS_NAME, "experiment-view-container"))) break except WebDriverException: sleep(5) else: driver.get(url) wait.until( expected_conditions.presence_of_element_located( (By.CLASS_NAME, "experiment-view-container"))) Path(f"/tmp/selenium-{request.node.name}.har").write_text(driver.har)
def query_api(params, recursion_count=0): param_string = '?' for i in params: param_string += '{}={}&'.format(i, params[i]) page = s.get(API + param_string).text try: response = json.loads(page) except: options = Options() options.headless = True options.add_argument("--disable-extensions") browser = webdriver.Firefox(options=options) browser.get(API) browser.header_overrides = KWIK_HEADER browser.close() if recursion_count < 5: return query_api(params, recursion_count=recursion_count + 1) else: print('Error encountered while querying API.....') return total = response.get('total') data = response.get('data') return total, data
def get_zacks_news_api_params(headless=True): options.headless = headless tokens_req = ['cse_tok', 'exp', 'callback'] driver = webdriver.Firefox(executable_path='/Users/ryan/geckodriver', firefox_options=options) url = 'https://www.zacks.com/search.php?q=apple' driver.get(url) driver.implicitly_wait(1) driver.find_element_by_xpath(xpaths['stock-news']).click() driver.implicitly_wait(1) lastitem = driver.find_element_by_xpath(xpaths['lastitem']) lastitem.location_once_scrolled_into_view driver.find_element_by_xpath(xpaths['stock-4']).click() internal_requests = driver.requests driver.quit() req = [ req for req in internal_requests if req.path.startswith("https://cse.google.com/cse/element/v1") ] if req: query_str = parse.urlparse(req[-1].path).query query_dic = parse.parse_qs(query_str) print(query_dic) return query_dic
def get_firefox(self, proxy, useragent): profile = webdriver.FirefoxProfile() if useragent is not None: profile.set_preference("general.useragent.override", useragent) firefox_capabilities = webdriver.DesiredCapabilities.FIREFOX firefox_capabilities['marionette'] = True options_sel = { 'connection_timeout': 5, 'suppress_connection_errors': True } if proxy is not None: options_sel['proxy'] = { 'http': proxy.get_init_string(), 'https': proxy.get_init_string().replace('http', 'https') } options = Options() options.headless = True options.add_argument("--window-size=1920,1080") options.add_argument('--user-agent={}'.format(useragent)) binary = FirefoxBinary(self.browser_path) driver_ = webdriver.Firefox(capabilities=firefox_capabilities, options=options, firefox_binary=binary, firefox_profile=profile, timeout=5, seleniumwire_options=options_sel) HEADERS = {} HEADERS['User-Agent'] = useragent driver_.header_overrides = HEADERS return driver_
def loginToTwitter(self): # instance # options = Options() # options.headless = True # profile = webdriver.FirefoxProfile() # profile.set_preference("permissions.default.image",2) # profile.set_preference("media.autoplay.blocking_policy",2) # driver = webdriver.Firefox(firefox_profile=profile) options = { 'proxy' : { 'http': "http://127.0.0.1:7890", 'https': "http://127.0.0.1:7890", "no_proxy": "localhost,127.0.0.1" } } driver = webdriver.Firefox(seleniumwire_options=options) self.driver = driver driver.get("http://www.twitter.com/login") sleep(4) # Login userName userNameElement = self.find_element("//input[@name='text']") userNameElement.clear() userNameElement.send_keys(getUserName()) userNameElement.send_keys(Keys.RETURN) sleep(1) # Login password passWordElement = self.find_element("//input[@name='password']") passWordElement.clear() passWordElement.send_keys(getPasswd()) passWordElement.send_keys(Keys.RETURN)
def __init__(self, url=None, reload_every=1000): self._reload_every = reload_every self._count_opened_urls = 0 self.driver = webdriver.Firefox() if url: self.driver.base_url = url self.driver.get(self.driver.base_url) self.driver.implicitly_wait(2)
def get_driver(): # Driver options options = webdriver.FirefoxOptions() options.set_preference("general.useragent.override", user_agent) options.set_preference("dom.webdriver.enabled", False) # options.headless = True return webdriver.Firefox(executable_path='path_to_geckodriver', options=options)
def __init__(self, start_url): profile = webdriver.FirefoxProfile() profile.set_preference("permissions.default.image", 2) # 1 - Allow all images # 2 - Block all images # 3 - Block 3rd party images self.driver = webdriver.Firefox(firefox_profile=profile) self.start_url = start_url self.seasons_urls = self._get_seasons()
def launch_browser(headers=None, user_agent=None, proxy=None, browser_type="Firefox"): options = {} if proxy: proxy = { "http": proxy, "https": proxy, } options["proxy"] = proxy if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"): directory = sys._MEIPASS else: directory = os.path.dirname(__file__) driver = None if browser_type == "Firefox": matches = ["geckodriver.exe", "geckodriver"] driver_paths = list( map(lambda match: os.path.join(directory, match), matches)) found_paths = [ driver_path for driver_path in driver_paths if os.path.exists(driver_path) ] if found_paths: driver_path = found_paths[0] opts = webdriver.FirefoxOptions() # opts.add_argument("--headless") profile = webdriver.FirefoxProfile() if not user_agent: user_agent = generate_user_agent() profile.set_preference("general.useragent.override", user_agent) driver = webdriver.Firefox( firefox_profile=profile, executable_path=driver_path, options=opts, seleniumwire_options=options, ) else: message = f"Download geckodriver from https://github.com/mozilla/geckodriver/releases/tag/v0.27.0 and paste it in {directory}" input(message) else: driver_path = os.path.join(directory, "chromedriver.exe") opts = webdriver.ChromeOptions() opts.add_argument(f"--proxy-server={opts}") driver = webdriver.Chrome(executable_path=driver_path, options=opts, seleniumwire_options=options) if not driver: input("DRIVER NOT FOUND") exit(0) driver.set_window_size(1920, 1080) browser = driver if headers: browser._client.set_header_overrides(headers=headers) return browser
def select_driver(browser): if browser == "Chrome": return webdriver.Chrome(executable_path='/usr/local/bin/chromedriver', seleniumwire_options={'verify_ssl': False}) # return webdriver.Chrome(executable_path='/usr/local/bin/chromedriver') elif browser == "Firefox": return webdriver.Firefox() else: return webdriver.Chrome(executable_path='/usr/local/bin/chromedriver')
def test_modify_param(self): driver = webdriver.Firefox() driver.param_overrides = {'foo': 'baz'} driver.get('https://httpbin.org/get?foo=bar') request = driver.wait_for_request('https://httpbin.org/get?foo=baz') self.assertEqual({'foo': 'baz'}, request.params) driver.quit()
def test_add_cache_control(self): url = 'https://www.python.org/' driver = webdriver.Firefox() driver.header_overrides = {'response:Cache-Control': 'none'} driver.get(url) request = driver.wait_for_request(url) self.assertEqual('none', request.response.headers['Cache-Control']) driver.quit()
def test_firefox_can_access_requests(self): url = 'https://www.python.org/' driver = webdriver.Firefox() driver.get(url) request = driver.wait_for_request(url) self.assertEqual(request.response.status_code, 200) self.assertIn('text/html', request.response.headers['Content-Type']) driver.quit()
def test_rewrite_url(self): driver = webdriver.Firefox() driver.rewrite_rules = [(r'(https?://)www.python.org/', r'\1www.wikipedia.org/')] driver.get('https://www.python.org/') driver.wait_for_request( 'https://www.wikipedia.org/') # Should find www.wikipedia.org driver.quit()
def test_custom_response_handler(self): """NOTE: this is being deprecated. Use driver.response_interceptor.""" def custom(req, req_body, res, res_body): print(f'res_body length: {len(res_body)}') options = {'custom_response_handler': custom} driver = webdriver.Firefox(seleniumwire_options=options) driver.get('https://www.python.org/') driver.quit()
def build_driver(): software_names = [SoftwareName.FIREFOX.value] operating_systems = [ OperatingSystem.WINDOWS.value, OperatingSystem.LINUX.value ] user_agent_rotator = UserAgent(software_names=software_names, operating_systems=operating_systems, limit=100) user_agent = user_agent_rotator.get_random_user_agent() # test for prevent block user_agent = "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0" options = Options() options.add_argument("--headless") options.add_argument("--no-sandbox") # options.add_argument("--window-size=1420,1080") options.add_argument("--disable-gpu") options.add_argument(f'user-agent={user_agent}') profile = webdriver.FirefoxProfile() profile.set_preference("general.useragent.override", user_agent) profile.add_extension(extension='./extensions/adblock_plus-3.11-an+fx.xpi') profile.add_extension( extension='./extensions/adblock_for_firefox-4.33.0-fx.xpi') API_KEY = '7f3282dc1e35451c7037fa93818b0cef' proxy_options = { 'proxy': { 'http': f'http://*****:*****@proxy-server.scraperapi.com:8001', 'https': f'http://*****:*****@proxy-server.scraperapi.com:8001', 'no_proxy': 'localhost,127.0.0.1' } } driver = webdriver.Firefox(firefox_profile=profile, firefox_binary=None, options=options, seleniumwire_options=proxy_options) print("Agent: {}".format(user_agent)) session_file = open(SELENIUM_SESSION_FILE, 'w') session_file.writelines([ driver.command_executor._url, "\n", driver.session_id, "\n", ]) session_file.close() time.sleep(5) # close other tabs ulties.closeOtherTabs(driver) return driver
def _firefox_config(self): options = { 'proxy': proxies } fp = webdriver.FirefoxProfile() fp.set_preference("general.useragent.override", self._user_agent) # choice useragent fp.set_preference("media.peerconnection.enabled", False) # disable webrtc fp.set_preference("plugin.state.flash", 0) # disable flash fp.set_preference("general.useragent.locale", "en") fp.update_preferences() # save settings return webdriver.Firefox(firefox_profile=fp, seleniumwire_options=options, executable_path='geckodriver.exe')
def establish_connection(): driver = webdriver.Firefox(seleniumwire_options={'verify_ssl': False}) driver.get(PODME_URL) wait_for_access(driver, LOGIN_PAGE, timer=20).click() wait_for_access(driver, EMAIL, timer=20).send_keys('*****@*****.**') wait_for_access(driver, PASSWORD, timer=20).send_keys('DownloadPodme123') wait_for_access(driver, LOGIN, timer=20).click() wait_for_access(driver, MY_PODCASTS, timer=20) return driver
def get_access_token(options): """ This function opens the login page and extracts access tokens Args: options: the options dict which contains login info Returns: access_token: the bearer token that will be used to extract activities """ login_success = False if options["gecko_path"] and not options["manual"]: info(f"🚗 Starting gecko webdriver") driver = webdriver.Firefox(executable_path=options["gecko_path"]) driver.scopes = [ ".*nike.*", ] login_success = login(driver, options["email"], options["password"]) if options["debug"]: debug(f"Saving screenshot from after login") with open("website.png", "wb") as f: f.write(driver.get_screenshot_as_png()) if login_success: access_token = extract_token(driver) else: info( f"I will open your web browser and you will have to manually intercept the access tokens.\n" f" You can find more details on how to do this over here: https://git.io/nrc-exporter\n" f" Press 'y' to open up the login url") accept = input() if not accept == "y": info("You didn't want to continue. Exiting") sys.exit(0) webbrowser.open_new_tab(MOBILE_LOGIN_URL) info(f"Please paste access tokens here: \n") access_token = input() debug(f"Manually entered access token: {access_token}") if len(access_token) < 5: error( f"You didn't paste access tokens. Please provide them using -t or --token argument" ) sys.exit(1) info( f"Closing the webdriver. From here on we will be using requests library instead" ) driver.quit() return access_token
def test_simple_example(self): # Create a new instance of the Firefox driver driver = webdriver.Firefox() # Go to the Google home page driver.get('https://www.google.com') # Access requests via the `requests` attribute for request in driver.requests: if request.response: print(request.url, request.response.status_code, request.response.headers['Content-Type'])
def __init__(self, profile=None, visible=False, cache_dir=None): ''' Scraper using selenium Args: profile: string, path to firefox profile, e.g. $HOME/.mozilla/firefox/6h98gbaj.default' ''' try: from pyvirtualdisplay import Display except ImportError: pass try: from seleniumwire import webdriver except ImportError: from selenium import webdriver try: from selenium.webdriver.common.desired_capabilities import DesiredCapabilities from selenium.webdriver.remote.errorhandler import WebDriverException from selenium.common.exceptions import TimeoutException except ImportError: pass logging.getLogger(__name__).addHandler(logging.NullHandler()) self.urls = [] self.cache_dir = cache_dir if not visible: self.display = Display(visible=0, size=(800, 600)) self.display.start() caps = DesiredCapabilities.FIREFOX.copy() caps['marionette'] = True firefox_profile = webdriver.FirefoxProfile(profile) if profile: self.browser = webdriver.Firefox(capabilities=caps, firefox_profile=firefox_profile, log_path=os.devnull) else: self.browser = webdriver.Firefox(capabilities=caps, log_path=os.devnull) self.browser.set_page_load_timeout(15)