def __init__(self, options, recursos="altos"): # Si el host es un ordenador con bajos recursos, abrirá ventanas de una en una cerrándolas # en lugar de varias de golpe if (recursos == "bajos"): self.driver = webdriver.Chrome(ChromeDriverManager().install(), options=options) else: caps = DesiredCapabilities().CHROME caps["pageLoadStrategy"] = "eager" self.driver = webdriver.Chrome(ChromeDriverManager().install(), desired_capabilities=caps, options=options) options = webdriver.ChromeOptions()
def set_driver(): # Prepare Firefox ## firefox profile caps = DesiredCapabilities().FIREFOX caps["pageLoadStrategy"] = "none" # none, eager, normal fp = webdriver.FirefoxProfile('/Users/mk/Library/Application Support/Firefox/Profiles/wn1jjo27.default') fp.set_preference('network.proxy.type', 1) # proxy fp.set_preference('network.proxy.http', '127.0.0.1') fp.set_preference('network.proxy.http_port', '8087') fp.set_preference('network.proxy.type', 0) # direct opts = webdriver.FirefoxOptions() # opts.headless = True ## open firefox driver = webdriver.Firefox(firefox_profile=fp, capabilities=caps, options=opts) return driver
def fetcher(): #print("Romanov started") display = Display(visible=0, size=(800, 600)) display.start() # print("Screen created") cap = DesiredCapabilities().FIREFOX # cap["marionette"] = False driver = webdriver.Firefox( capabilities=cap, executable_path="/home/shahrooz/.local/bin/geckodriver") try: driver.get(URL) if driver is None: print("Driver error!") #return redirect(url_for('error')) return None print("Got URL") while True: last_height = driver.execute_script( "return document.body.scrollHeight") driver.execute_script( "window.scrollTo(0, document.body.scrollHeight);") time.sleep(PAUSE_TIME) new_height = driver.execute_script( "return document.body.scrollHeight") # Check if the page height has remained the same if new_height == last_height: # If so, we are done break # If not, move on to the next loop else: print("Scrolling again...") last_height = new_height continue html1 = driver.page_source finally: driver.close() #with open('politics.html', 'wb') as f: # f.write(str.encode(html1)) # print("politics.html generated") #end_time = time.time() return html1
def create_browser(self): '''Creates a Chromium instance with some basic settings to load data into.''' # Set capabilities capabilities = DesiredCapabilities().CHROME capabilities["pageLoadStrategy"] = "normal" # Set options options = webdriver.ChromeOptions() prefs = { 'profile.managed_default_content_settings.images': 2, 'user-data-dir': 'C/' } options.add_experimental_option("prefs", prefs) return webdriver.Chrome(chrome_options=options, desired_capabilities=capabilities)
def init_chrome_driver(self): if not self.chrome_driver: print("Initializing chrome driver") try: caps = DesiredCapabilities().CHROME caps["pageLoadStrategy"] = "eager" chrome_options = webdriver.ChromeOptions() chrome_options.add_argument('--log-level=3') chrome_options.add_argument('--disable-logging') chrome_options.add_argument('--headless') self.chrome_driver = webdriver.Chrome( executable_path="./Driver/chromedriver.exe", options=chrome_options) except: print("Couldn't init chrome drivers") self.exit("init_chrome")
def __init__(self): self.login_url = "https://passport.bilibili.com/login" caps = DesiredCapabilities().FIREFOX firefox_options = Options() firefox_options.headless = True self.browser = webdriver.Firefox( executable_path="/Users/chloeji/geckodriver", capabilities=caps, options=firefox_options) self.driver_wait = WebDriverWait(self.browser, 60) """ 设置超级鹰的用户名、密码以及软件 ID """ self.chaojiying = ChaojiyingClient(CHAOJIYING_USERNAME_, CHAOJIYING_PASSWORD_, CHAOJIYING_SOFT_ID_)
def getSeleniumDriverForFirefox(fireFoxBinaryPath, geckoDriverPath, windowSizeXAxis, windowSizeYAxis): options = Options() options.set_headless(headless=True) options.binary = binary options.add_argument('--ignore-certificate-errors') cap = DesiredCapabilities().FIREFOX cap["marionette"] = True #optional driver = webdriver.Firefox(firefox_options=options, capabilities=cap, executable_path=geckodriver) driver.set_window_size( windowSizeXAxis, windowSizeYAxis) # set the window size that you need return driver
def launch_first_firefox(url): profile = FirefoxProfile() cap = DesiredCapabilities().FIREFOX profile.set_preference('browser.download.manager.showWhenStarting', False) profile.set_preference('browser.download.dir', os.getcwd()) profile.set_preference("browser.helperApps.neverAsk.openFile", "application/xls") binary = FirefoxBinary('K://firefox.exe') driver = Firefox(firefox_profile=profile, firefox_binary=binary, capabilities=cap, executable_path="K://geckodriver.exe") session_id = driver.session_id url_exec = driver.command_executor._url driver.get(url) return session_id, url_exec
def _create_new_browser(self): capa = DesiredCapabilities().CHROME capa["pageLoadStrategy"] = "none" # capa["pageLoadStrategy"] = 'eager' from selenium.webdriver.chrome.options import Options cr_options = Options() # cr_options.headless = True # # User-Agent # _system_ver = ["Windows NT 10.0; Win64; x64"] _V_537_36 = "537.36" _applewebkit_ver = [ _V_537_36, _V_537_36, ] _chrome_ver = [ "70.0.3538.102", "71.0.3578.98", ] _safari_ver = [ _V_537_36, _V_537_36, ] ''' def _random_from_list(l): from random import randint return l[randint(0, len(l) - 1)] ''' from random import randint _idx = randint(0, len(_chrome_ver) - 1) __string = "user-agent=Mozilla/5.0 " +\ "({}) ".format(_system_ver[randint(0, len(_system_ver) - 1)]) + \ "AppleWebKit/" + _applewebkit_ver[_idx] + " (KHTML, like Geoko) " +\ "Chrome/" + _chrome_ver[_idx] + " " +\ "Safari/" + _safari_ver[_idx] cr_options.add_argument(__string) browser = webdriver.Chrome( executable_path="/usr/lib/chromium-browser/chromedriver", desired_capabilities=capa, chrome_options=cr_options, ) time.sleep(1) return browser
def open_browser(self, url=Inicializar.URL, browser=Inicializar.Browser): print("Directorio Base: " + Inicializar.basedir) self.windows = {} print("---------------") print(browser) print("---------------") # Microsoft EDGE if browser == "EDGE": self.driver = webdriver.Edge(executable_path=Inicializar.basedir + "\\drivers\\msedgedriver.exe") self.driver.maximize_window() self.driver.implicitly_wait(10) self.driver.get(url) self.principal = self.driver.window_handles[0] self.windows = {'Principal': self.driver.window_handles[0]} return self.driver # CHROME if browser == "CHROME": options = OpcionesChrome() options.add_argument('start-maximized') self.driver = webdriver.Chrome( options=options, executable_path=Inicializar.basedir + "\\drivers\\chromedriver.exe") self.driver.implicitly_wait(10) self.driver.get(url) self.principal = self.driver.window_handles[0] self.windows = {'Principal': self.driver.window_handles[0]} return self.driver # FIREFOX if browser == "FIREFOX": cap = DesiredCapabilities().FIREFOX #cap["marionette"] = False self.driver = webdriver.Firefox( capabilities=cap, executable_path=Inicializar.basedir + "\\drivers\\geckodriver.exe") self.driver.implicitly_wait(10) self.driver.maximize_window() self.driver.get(url) self.principal = self.driver.window_handles[0] self.windows = {'Principal': self.driver.window_handles[0]} self.nWindows = 0 return self.driver
def load(self): chrome_options = webdriver.ChromeOptions() chrome_options.add_argument('--no-sandbox') chrome_options.add_argument('--disable-dev-shm-usage') chrome_options.add_argument('--test-type') chrome_options.add_argument('--disable-fre') chrome_options.add_argument('--no-default-browser-check') chrome_options.add_argument('--no-first-run') chrome_options.add_argument("--ignore-ssl-errors=yes") chrome_options.add_argument('--ignore-certificate-errors') chrome_options.add_argument( "--disable-features=IsolateOrigins,site-per-process") chrome_options.add_argument("--disable-site-isolation-for-policy") chrome_options.add_argument("--disable-site-isolation-trials") caps = DesiredCapabilities().CHROME caps["pageLoadStrategy"] = "none" if OS_NAME == "ubuntu": chrome_options.add_argument( "--user-agent=Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36" ) webdriver.Chrome.__init__(self, os.path.join(".", "bin", "chromedriver_ubuntu"), options=chrome_options, desired_capabilities=caps) elif OS_NAME == "windows": webdriver.Chrome.__init__(self, os.path.join(".", "bin", "chromedriver_windows.exe"), options=chrome_options, desired_capabilities=caps) elif OS_NAME == "macos": chrome_options.binary_location = os.path.join( ".", "bin", "Google Chrome.app/Contents/MacOS/Google Chrome") webdriver.Chrome.__init__(self, os.path.join(".", "bin", "chromedriver"), options=chrome_options, desired_capabilities=caps) self.set_page_load_timeout(180) self.is_loaded = True self.implicitly_wait(10) sleep(2) self.maximize_window()
def setup(extensions: List[str] = None): caps = DesiredCapabilities().CHROME caps["pageLoadStrategy"] = "eager" chrome_options = Options() chrome_options.add_argument("--disable-web-security") if extensions is not None: for extension in extensions: chrome_options.add_extension(extension) driver = webdriver.Chrome(desired_capabilities=caps, executable_path='./chromedriver', options=chrome_options) driver.get('https://www.instagram.com/') return driver
def __init__(self, browser_type=None): if not browser_type: self.driver = webdriver.Chrome() elif browser_type.lower() == 'firefox': cap = DesiredCapabilities().FIREFOX cap["marionette"] = False firefox_options = FOptions() firefox_options.add_argument("--headless") self.driver = webdriver.Firefox(capabilities=cap, options=firefox_options) # self.driver = webdriver.Firefox() else: raise NameError(f'{browser_type} is not accepted.')
def __init__(self): options = Options() # options.add_argument('-headless') # headless mode so the window doesnt actually appear (disabled currently) caps = DesiredCapabilities().FIREFOX caps["pageLoadStrategy"] = "none" # scrape page prematurely self.all_data = [ ] # a collection of all the scraped user data held in a dictionary # # TODO: The executable_path will have to be reconfigured for the # currnet location in instagram_scrape/webdrivers/geckodriver.exe # self.driver = Firefox(executable_path='webdrivers/geckodriver.exe', options=options, capabilities=caps) # driver to access webpages self.username = None
def __init__(self, url, worker_id = -1, verbose=False, rewardCritic=None): capabilities = DesiredCapabilities().CHROME capabilities["pageLoadStrategy"] = "none" capabilities['loggingPrefs'] = { 'browser':'ALL' } f = open('hrefs.txt', 'r', encoding="utf-8") self.possible_actions = f.read().splitlines() f.close() self.url = url self.worker_idx = worker_id self.picname = 'pic' + str(worker_id) + '.png' self.observation_space = np.zeros((128, 128, 3), dtype=np.float32) self.action_space = np.zeros(len(self.possible_actions), dtype=np.float32) self.verbose = verbose if worker_id > -1: self.driver = webdriver.Chrome(desired_capabilities=capabilities) self.rewardCritic = rewardCritic
def random_driver(loadstrat="normal", images=True, visible=True): caps = DesiredCapabilities().CHROME caps["pageLoadStrategy"] = loadstrat opts = Options() if not images: prefs = {"profile.managed_default_content_settings.images": 2} opts.add_experimental_option("prefs", prefs) if not visible: opts.add_argument('headless') opts.add_argument('window-size=1200x600') useragent = random_user_agent() opts.add_argument("user-agent=" + useragent) driver = webdriver.Chrome(desired_capabilities=caps, executable_path=local_directory + '/chromedriver', chrome_options=opts) return driver
def getWebDriver(self, URL): driver = None options = Options() options.set_headless(headless=True) options.binary = self.browser cap = DesiredCapabilities().FIREFOX cap["marionette"] = True try: driver = Firefox(firefox_options=options, capabilities=cap, executable_path=self.webDriver) driver.get(URL) except: logger.error( "Failed to generate driver to parse {}, driver: {}".format( URL, driver)) return driver
def __init__(self, dbName, CollectionName, searchWordsFileName, url): #initiate driver options = webdriver.ChromeOptions() # options.add_argument('headless') # options.add_argument('window-size=800x600') caps = DesiredCapabilities().CHROME # caps["pageLoadStrategy"] = "normal" # Waits for full page load caps["pageLoadStrategy"] = "none" # Do not wait for full page load self.driver = webdriver.Chrome( desired_capabilities=caps, executable_path='/Users/lezardvaleth/Documents/Python/chromedriver', options=options) self.client = MongoClient() self.DB = self.client[dbName] self.Collection = self.DB[collectionName] self.searchWordsFileName = searchWordsFileName self.url = url
def __init__(self, tcp_ip='localhost', tcp_port=5005, buffer_size=20): self.buffer_size = buffer_size caps = DesiredCapabilities().CHROME caps["pageLoadStrategy"] = "none" options = webdriver.chrome.options.Options() #options.add_argument("--headless") options.add_argument("--no-sandbox") options.add_argument("--disable-setuid-sandbox") options.add_argument("--disable-extensions") self.driver = webdriver.Chrome(desired_capabilities=caps, chrome_options=options) self.driver.maximize_window() tcpconnection = socket.socket(socket.AF_INET, socket.SOCK_STREAM) tcpconnection.bind((tcp_ip, tcp_port)) tcpconnection.listen(1) self.conn, self.addr = tcpconnection.accept() self.last_length = 10
def get_firefox(download_dir='', log_dir=''): ''' Returns a Firefox WebDriver that accepts untrusted certificates, will no ask to resume from crashes, does not use cache, uses specified download directory and logfile, downloads files automatically. :param download_dir: str - path of the download directory :param log_dir: str - path of the logfile :return: WebDriver object ''' # New firefox profile profile = webdriver.FirefoxProfile() # Accept untrusted certs and untrusted issuers (testing systems usually don't have trusted certs) profile.accept_untrusted_certs = True profile.default_preferences['webdriver_assume_untrusted_issuer'] = False profile.assume_untrusted_cert_issuer = False # Disable safe mode profile.set_preference('toolkit.startup.max_resumed_crashes', '-1') # Disable cache profile.set_preference('network.http.use-cache', False) # Set download dir and don't ask for confirmation profile.set_preference("browser.download.folderList", 2) # print download_dir profile.set_preference("browser.download.dir", download_dir) profile.set_preference('browser.helperApps.neverAsk.saveToDisk', 'application/octet-stream') # Set the logfile profile.set_preference("webdriver.log.file", log_dir) # Save preferences profile.update_preferences() # Get the capabilities and set SSL parameters capabilities = DesiredCapabilities().FIREFOX capabilities['acceptSslCerts'] = True capabilities['acceptInsecureCerts'] = True capabilities['handleAlerts'] = True capabilities[ 'marionette'] = False # Uncomment this line to use Selenium 3 with Firefox <=47 # Start Firefox with our profile and capabilities and return the WebDriver return webdriver.Firefox(firefox_profile=profile, capabilities=capabilities)
def main1(defaultlink): ret = [] cap = DesiredCapabilities().FIREFOX cap["marionette"] = False driver = bot.getRequest(url=defaultlink, cap=cap, exec_path="geckodriver.exe", flag=1) #path of driver if driver["status"]["code"]: driver = driver["data"] driver.maximize_window() j = 1 links = [ e.text for e in driver.find_elements_by_xpath( '//ul[@class="esf-results-alpha-list"]//li[3]//a') ] driver.quit() for i in links: url = "https://www.comexposium.com/Events-Index/Find-an-event/(start_with)/" url = url + i driver = bot.getRequest(url=url, cap=cap, exec_path="geckodriver.exe", flag=1) if driver["status"]["code"]: driver = driver["data"] try: links_on_each_page = [ e.get_attribute('href') for e in driver.find_elements_by_xpath( '//h3[@class="catal-ex-item-title"]//a') ] for i in links_on_each_page: data_dic = scrape_page(i, j) ret.append(data_dic) j = j + 1 except: pass driver.quit() else: print(driver['status']['message']) return ret else: print(driver['status']['message']) return None
def openLoginPage(self): options = Options() options.headless = False caps = DesiredCapabilities().FIREFOX caps["pageLoadStrategy"] = "eager" # create a new Firefox session self.driver = webdriver.Firefox( options=options, capabilities=caps, executable_path=app.config['PATH_GECKODRIVER'], service_log_path=app.config['PATH_GECKODRIVER_LOG']) self.driver.get(self.url) xpathSignupBody = "//body[@class='signup-page']" checkSignupBody = EC.visibility_of_element_located( (By.XPATH, xpathSignupBody)) WebDriverWait(self.driver, 60).until(checkSignupBody)
def __init__(self, chrome_driver): super(Bot, self).__init__() self.password = "******" self.email = "email here" extension = './extension_0_6_0_0.crx' chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_extension(extension) chrome_options = webdriver.ChromeOptions() chrome_options.add_argument("--incognito") caps = DesiredCapabilities().CHROME caps["pageLoadStrategy"] = "normal" self.driver = webdriver.Chrome(desired_capabilities=caps, executable_path=chrome_driver, chrome_options=chrome_options)
def __init__(self, product_type, u, p): self.host = HOSTS[product_type] _option = webdriver.ChromeOptions() # _option.add_argument('--headless') _option.add_argument('--disable-gpu') # 本地web_driver # self.web_driver = webdriver.Chrome(chrome_options=_option) try: self.web_driver = webdriver.Remote( command_executor='http://127.0.0.1:4444/wd/hub', desired_capabilities=DesiredCapabilities().CHROME.copy()) except MaxRetryError: print "请检查selenium是否正常:", 'http://127.0.0.1:4444/wd/hub' sys.exit(1) self.wait = WebDriverWait(self.web_driver, 5) self.u = u self.p = p.strip()
def get_driver(self, ip=None): chrome_option = webdriver.ChromeOptions() if ip is not None: chrome_option.add_argument("--proxy-server=http://%s" % (ip)) if self.headless: chrome_option.add_argument("--headless") chrome_option.add_argument("--no-sandbox") caps = DesiredCapabilities().CHROME caps[ "pageLoadStrategy"] = self.pageloadstrategy # complete#caps["pageLoadStrategy"] = "eager" # interactive#caps["pageLoadStrategy"] = "none" args = {"desired_capabilities": caps, "chrome_options": chrome_option} driver = webdriver.Chrome(**args) driver.maximize_window() driver.set_page_load_timeout(self.pageloadtimeout) return driver
def __init__(self, webdriver=None, debug=False): if webdriver is None: from selenium.webdriver.common.desired_capabilities import DesiredCapabilities cap = DesiredCapabilities().FIREFOX cap['args'] = ['--headless'] cap["marionette"] = False webdriver = Firefox(capabilities=cap) pass self.__closed = False self.first_tab = True self.__fullscreen = False self.tabs = {} self.debug = debug self.f = webdriver pass
def __init__(self, headless, seller, is_next_url_full, scroll): options = webdriver.ChromeOptions() if headless: options.add_argument('headless') options.add_argument('window-size=1200x600') caps = DesiredCapabilities().CHROME caps["pageLoadStrategy"] = "none" self.driver = webdriver.Chrome(executable_path=chrome_driver_path, chrome_options=options, desired_capabilities=caps) self.driver.implicitly_wait(10) # seconds self.seller = seller self.scroll = scroll self.is_next_url_full = is_next_url_full self.items_checked = 0 self.items_added = 0 pass
def login(self): try: # 安管中心环境使用#### driver = webdriver.Firefox() except: # 213环境使用######## cap = DesiredCapabilities().FIREFOX cap["marionette"] = False driver = webdriver.Firefox(capabilities=cap) driver.get(self.login_url) time.sleep(1) driver.find_element_by_xpath('//input[@id="loginName"]').send_keys( self.username) driver.find_element_by_xpath('//input[@id="loginPassword"]').send_keys( self.password) driver.find_element_by_xpath('//a[@id="loginAction"]').click() time.sleep(5) return driver
def flush(browser, n): ua = DesiredCapabilities().IPHONE for i in range(n): if browser.lower() == 'firefox': driver = webdriver.Firefox() elif browser.lower() == 'chrome': driver = webdriver.Chrome(executable_path=driver_path + 'chromedriver.exe') elif browser.lower() == 'ie': driver = webdriver.Ie(executable_path=driver_path + 'IEDriverServer.exe') elif browser.lower() == 'phantomjs': driver = webdriver.PhantomJS(executable_path=driver_path + 'phantomjs.exe', desired_capabilities=ua) driver.get('http://m.baidu.com') driver.find_element_by_id('index-kw').send_keys(random.choice(KEYWORDS), Keys.ENTER) clk(driver, url='csdn') sleep(1) print driver.find_element_by_class_name('article_t').text, print driver.find_element_by_xpath('//p[@class="date"]/i[2]').text driver.close()
def test_multiple_users_can_start_lists_at_different_urls(self): # Edith starts a new to-do list self.browser.get(self.live_server_url) inputbox = self.browser.find_element_by_id('id_new_item') inputbox.send_keys('Buy peacock feathers') inputbox.send_keys(Keys.ENTER) self.wait_for_row_in_list_table('1: Buy peacock feathers') # She notices that her list has a unique URL edith_list_url = self.browser.current_url self.assertRegex(edith_list_url, '/lists/.+') # Now a new user, Francis, comes along to the site. ## We use a new browser session to make sure that no information ## of Edith's is coming through from cookies etc self.browser.quit() cap = DesiredCapabilities().FIREFOX cap["marionette"] = False self.browser = webdriver.Firefox(capabilities=cap) # Francis visits the home page. There is no sign of Edith's # list self.browser.get(self.live_server_url) page_text = self.browser.find_element_by_tag_name('body').text self.assertNotIn('Buy peacock feathers', page_text) self.assertNotIn('make a fly', page_text) # Francis starts a new list by entering a new item. He # is less interesting than Edith... inputbox = self.browser.find_element_by_id('id_new_item') inputbox.send_keys('Buy milk') inputbox.send_keys(Keys.ENTER) self.wait_for_row_in_list_table('1: Buy milk') # Francis gets his own unique URL francis_list_url = self.browser.current_url self.assertRegex(francis_list_url, '/lists/.+') self.assertNotIn(francis_list_url, edith_list_url) # Again, there is no trace of Edith's list page_text = self.browser.find_element_by_tag_name('body').text self.assertNotIn('Buy peacock feathers', page_text) self.assertIn('Buy milk', page_text)