def test_load_dividends_table(): driver_options = options.Options() driver_options.headless = True with webdriver.Firefox(options=driver_options) as driver: driver.get('https://www.conomy.ru/emitent/akron') conomy_ru.load_dividends_table(driver) assert driver.current_url == 'https://www.conomy.ru/emitent/akron/akrn-div'
def __init__(self): option = options.Options() option.add_argument('-headless') self.LOGIN_URL = "http://example.python-scraping.com/user/login" self.LOGIN_NAME = "*****@*****.**" self.LOGIN_PASSWORD = "******" self.driver = webdriver.Firefox(options=None)
def test_load_ticker_page(): driver_options = options.Options() driver_options.headless = True with webdriver.Firefox(options=driver_options) as driver: conomy_ru.load_ticker_page(driver, 'AKRN') conomy_ru.xpath_await(driver, conomy_ru.DIVIDENDS_MENU) assert driver.current_url == 'https://www.conomy.ru/emitent/akron'
def setup_driver() -> webdriver: Options = options.Options() Options.headless = True driver = webdriver.Firefox(options=Options, service=Service(GeckoDriverManager().install())) return driver
def new_driver(self): if self.browser == 'chrome': options = chrome_options.Options() options.headless = self.headless browser_path = which("chrome") or which("chromium") options.binary_location = browser_path # type: ignore chromedriver_path = which('chromedriver') if not chromedriver_path: raise Exception("chromedriver not found in PATH") return webdriver.Chrome(options=options, executable_path=chromedriver_path) elif self.browser == 'firefox': options = firefox_options.Options() options.headless = self.headless binary = FirefoxBinary(which("firefox")) # options.binary = FirefoxBinary(which("firefox")) # type: ignore geckodriver_path = which('geckodriver') if not geckodriver_path: raise Exception("geckodriver not found in PATH") return webdriver.Firefox(options=options, firefox_binary=binary, executable_path=geckodriver_path, service_log_path=self.driver_log_file or "geckodriver.log") else: raise Exception(f"Unsupported browser: {self.browser}")
def test_xpath_await(): driver_options = options.Options() driver_options.headless = True with webdriver.Firefox(options=driver_options) as driver: driver.get(conomy_ru.SEARCH_URL) element = conomy_ru.xpath_await(driver, conomy_ru.SEARCH_FIELD) assert element.get_attribute( 'placeholder') == 'Поиск эмитентов по названию или тикеру'
def test_xpath_await_no_element(): driver_options = options.Options() driver_options.headless = True with webdriver.Firefox(options=driver_options) as driver, pytest.raises( TimeoutException) as error: driver.get(conomy_ru.SEARCH_URL) conomy_ru.xpath_await(driver, conomy_ru.SEARCH_FIELD + '/div', 1) assert error.type == TimeoutException
def get_html(ticker: str): """Возвращает html код страницы с данными по дивидендам""" driver_options = options.Options() driver_options.headless = True with webdriver.Firefox(options=driver_options) as driver: load_ticker_page(ticker, driver) load_dividends_table(driver) return driver.page_source
def init_firefox(self): options = firefox.Options() options.set_headless() driver = webdriver.Firefox(options=options) driver.set_page_load_timeout(60) self.drivers['firefox'] = driver
def __init__(self, cache=RedisCache(), delay=2): option = options.Options() option.add_argument('-headless') self.cache = cache self.throttle = Throttle(delay) self.driver = webdriver.Firefox(options=option) self.driver.set_page_load_timeout(20) self.writer = csv.writer(open("./data/data.csv", "w", newline=""))
def driverInit(headLess=False): """Create the webdriver object.""" if headLess: opts = options.Options() opts.headless = True driver = webdriver.Firefox(options=opts) else: driver = webdriver.Firefox() return driver
def get_firefox(): options = firefox_options.Options() options.headless = True fp = webdriver.FirefoxProfile( 'C:/Users/Tu Le/AppData/Roaming/Mozilla/Firefox/Profiles/ejmvt067.default-1554669393168' ) driver = webdriver.Firefox(options=options, firefox_profile=fp, executable_path=PATH_FIREFOX_DRIVER) return driver
def main(): random.seed() profile = webdriver.FirefoxProfile('/home/jerry/.mozilla/firefox/huqgok2n.default') opts = options.Options() opts.headless = True # run indefinitely while True: run_routes(profile, opts) sleep_range = random.randint(-60, 60) print('Sleeping %d s.' % (SLEEP_BASE + sleep_range)) time.sleep(SLEEP_BASE + sleep_range)
def __init__(self, headless_mode: bool = False) -> None: """ :param headless_mode: define se o navegador será renderizado ou não. Por padrão (headless_mode = False), o navegador será renderizado. :return: None """ self._options: options.Options = options.Options() self._options.headless = headless_mode self._browser: WebDriver = webdriver.Firefox(options=self._options)
def __init__(self, selenium_webdriver=None, headless=False): # noqa: D107 self.element_class = Element if selenium_webdriver: # If a webdriver is suppied, use it. self.selenium_webdriver = selenium_webdriver else: # Configure and use a Firefox webdriver if no webdriver is # supplied. options = firefox_options.Options() options.headless = headless self.selenium_webdriver = selenium.webdriver.Firefox( executable_path="geckodriver", options=options, )
def start_session(self, capabilities, browser_profile=None): """ 重写start_session方法 """ if not isinstance(capabilities, dict): raise InvalidArgumentException("Capabilities must be a dictionary") if browser_profile: if "moz:firefoxOptions" in capabilities: capabilities["moz:firefoxOptions"]["profile"] = browser_profile.encoded else: capabilities.update({'firefox_profile': browser_profile.encoded}) self.capabilities = f_option.Options().to_capabilities() self.session_id = self.r_session_id self.w3c = False
def test_nondefault_browser(): # Create and configure the browser. options = firefox_options.Options() options.headless = True selenium_webdriver = selenium.webdriver.Firefox( executable_path="geckodriver", options=options, ) browser = Browser(selenium_webdriver) # Test the the browser. url = utilities.build_url("test_actions.html") browser.visit(url) assert browser.title == "Actions test page" # Stop the browser. browser.quit()
def setUp(self): self.world = baker.make(models.Place, site_type=enums.SiteTypes.WORLD) self.url = reverse(self.resolver) self.url = f'{self.live_server_url}{self.url}' self.options = firefox_options.Options() self.options.headless = True self.browser = webdriver.Firefox(options=self.options) # SetUp for logging user self.user = baker.make(get_user_model()) self.client.force_login(self.user) self.session_cookie = self.client.cookies['sessionid'] self.browser.get(self.url) self.browser.add_cookie({ 'name': 'sessionid', 'value': self.session_cookie.value, 'secure': False, 'path': '/' }) self.browser.refresh()
def __init__( self, headless_browser: bool = True, download_dir: str = "data", ): self.options = options.Options() self.options.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/csv") self.headless_browser = headless_browser if self.headless_browser: self.options.headless = True if not os.path.exists(download_dir): os.mkdir(download_dir) self.download_dir = os.path.abspath(download_dir) # don't use default Downloads directory self.options.set_preference("browser.download.folderList", 2) # set download directory self.options.set_preference("browser.download.dir", self.download_dir) self.driver = webdriver.Firefox(options=self.options) if self.driver is not None: self.driver.implicitly_wait(12) self.driver.set_page_load_timeout(30)
def __init__(self, timeout: int = 0, headless_mode: bool = False) -> None: """ Classe responsável por retirar informações dos capítulos dos mangás contidos em "https://mangalivre.net" onde as páginas são geradas por meio de javascript. :param timeout: tempo em segundos que o navegador irá esperar para aparecerem as informações dos capítulos. :param headless_mode: define se o navegador será ou não renderizado. Por padrão, o navegador será renderizado. :return: None """ self._options: options.Options = options.Options() self._options.headless = headless_mode self._browser: WebDriver = webdriver.Firefox(options=self._options) self._web_driver_wait = WebDriverWait( driver=self._browser, timeout=timeout, )
class GetCreditCardRewards(unittest.TestCase): #boot webdriver Options = options.Options() Options.add_argument("--headless") driver = webdriver.Firefox(firefox_options=Options) #find current rewards categories driver.get( "https://www.discover.com/credit-cards/cashback-bonus/cashback-calendar.html" ) discoverItRewards = driver.find_element_by_css_selector( ".offer-enroll .offer-name").text driver.get( "https://www.citi.com/credit-cards/credit-cards-citi/citi.action?ID=dividend-quarterly-offer" ) citiRewardsList = driver.find_elements_by_css_selector( ".selectedProductRow .descriptions") citi = "" for citiRewards in citiRewardsList: if citi == "": citi = citiRewards.text else: citi = citi + " and " + citiRewards.text #chase is harder because they are all images so it has to be done by quarter and alt text currentMonth = datetime.now().month currentQuarter = (currentMonth - 1) // 3 driver.get("https://creditcards.chase.com/freedom/calendar") chaseRewardsList = driver.find_elements_by_css_selector( ".quarter-box-wrapper .quarter-box") chase = chaseRewardsList[currentQuarter].find_element_by_css_selector( "img") chase = chase.get_attribute("alt") #write to sqllite updateCategories('Discover', discoverItRewards) updateCategories('Citi', citi) updateCategories('Chase', chase)
import re browser = None try: chrome_options = options.Options() chrome_options.add_argument("--headless") browser = webdriver.Chrome(options=chrome_options) print("Chrome Found.") browser.maximize_window() except (IOError, Exception): print("Chrome not Found. Moving to Firefox.") pass try: firefox_options = fir_options.Options() firefox_options.add_argument("--headless") browser = webdriver.Firefox(options=firefox_options) print("Firefox Found.") browser.maximize_window() except (IOError, Exception): print("Firefox not found. Get one of these") pass # url for album to download. url = input("Enter the URL for the album to download: ") flac_check = re.findall('FLAC', requests.get(url).text) browser.get(url) time.sleep(2)
def scraping_files_and_folders(): browser = None url = input("Enter the URL for the Finisher to download: ") try: chrome_options = options.Options() chrome_options.add_argument("--headless") browser = webdriver.Chrome(options=chrome_options) print("Chrome Found.") browser.maximize_window() except(IOError, Exception): print("Chrome not Found. Moving to Firefox.") pass if not browser: try: firefox_options = fir_options.Options() firefox_options.add_argument("--headless") browser = webdriver.Firefox(options=firefox_options) print("Firefox Found.") browser.maximize_window() except(IOError, Exception): print("Firefox not found. Get one of these") pass folder_url = open('tempFolder.txt', 'w+') file_url = open('tempFile.txt', 'w+') folder_url_list = [] file_url_list = [] # Ensure that the page is loaded correctly ensure_page_loaded(browser, url) file = browser.find_elements_by_xpath("//a[@class='file']") file_view = browser.find_elements_by_xpath("//a[@class='file view']") for i in file: file_url_list.append(i.get_attribute('href')) for i in file_view: file_url_list.append(i.get_attribute('href')) index = 1 folder_url_list.append(browser.find_elements_by_xpath("//a[@class='folder']")[0].get_attribute('href')) while True: time.sleep(3) folder = browser.find_elements_by_class_name("folder") real_folders = [] for i in folder: if i.find_elements_by_tag_name("div"): real_folders.append(i) folder_url_list.append(i.get_attribute('href')) if index < len(folder_url_list): # Ensure that the page is loaded correctly print(f'Scraping Folder...') ensure_page_loaded(browser, folder_url_list[index]) file = browser.find_elements_by_xpath("//a[@class='file']") file_view = browser.find_elements_by_xpath("//a[@class='file view']") for i in file: file_url_list.append(i.get_attribute('href')) for i in file_view: file_url_list.append(i.get_attribute('href')) else: break index = index + 1 for i in folder_url_list: folder_url.write(str(i) + '\n') for i in file_url_list: file_url.write(str(i) + '\n') f = open('debug_file.txt', 'w+') for i in folder_url_list: f.write(str(i) + '\n') folder_url.close() file_url.close() time.sleep(3) browser.close()
def firefox(): options = FirefoxOptions.Options() options.add_argument('--start-maximized') driver = webdriver.Firefox(options=options) return driver
def PDBReader(file, timeout=600): """ Accesses http://charmm-gui.org and uses the PDB Reader. Parameters ---------- file : str Path to the input PDB file. timeout : int Timeout in seconds. Returns ------- filename_output : str The absolute path to the output TGZ archive. """ def autoClicker(id, timeout): # deals with some rare cases of an unclickable element for i in range(timeout): try: elem = driver.find_element_by_id(id) elem.click() return except _exceptions.WebDriverException: _time.sleep(1) elem = driver.find_element_by_id(id) elem.click() file = _os.path.abspath(file) options = _options.Options() options.headless = True try: driver = _seleniumrequests.Chrome(options=options) except _exceptions.WebDriverException: try: driver = _seleniumrequests.Firefox(options=options) except _exceptions.WebDriverException: raise SystemError("Need either Chrome or Firefox for CHARMM-GUI " "functionality.") _logging.info("Accessing http://www.charmm-gui.org ...") driver.get("http://www.charmm-gui.org/?doc=input/pdbreader") pdb_element = driver.find_element_by_name("file") pdb_element.send_keys(file) pdb_radio = driver.find_element_by_xpath("//input[@name='pdb_format' and " "@value='PDB']") pdb_radio.click() autoClicker("nextBtn", 60) # could add some support for options. For now, we just go with the # defaults. wait = _wait.WebDriverWait(driver, timeout) wait.until(_EC.element_to_be_clickable((_by.By.ID, "nextBtn"))) autoClicker("nextBtn", 60) wait.until(_EC.element_to_be_clickable((_by.By.ID, "nextBtn"))) autoClicker("nextBtn", 60) try: _logging.info("Retrieving files...") wait.until( _EC.visibility_of_any_elements_located( (_by.By.CLASS_NAME, "download"))) except TimeoutError: raise ConnectionError("Could not retrieve any files. Please increase " "the maximum timeout or try again later.") _logging.info("Downloading TGZ archive...") filebase = _os.path.splitext(file)[0] tgz_file = driver.find_elements_by_partial_link_text(".tgz")[0] response = driver.request('POST', tgz_file.get_attribute("href"), verify=False, stream=True) with open(filebase + "_CHARMM.tgz", "wb") as file: file.write(response.raw.read()) driver.quit() return filebase + "_CHARMM.tgz"
def ligandReader(file, timeout=60, find_similar_residues=False): """ Accesses http://charmm-gui.org and uses the Ligand Reader. Parameters ---------- file : str Path to the input ligand file. timeout : int Timeout in seconds. find_similar_residues : bool Whether to tick the "Find similar residues" checkbox before searching. Returns ------- filename_output : str The absolute path to the output TGZ archive. """ file = _os.path.abspath(file) options = _options.Options() options.headless = True try: driver = _seleniumrequests.Chrome(options=options) except _exceptions.WebDriverException: try: driver = _seleniumrequests.Firefox(options=options) except _exceptions.WebDriverException: raise SystemError("Need either Chrome or Firefox for CHARMM-GUI " "functionality.") _logging.info("Accessing http://www.charmm-gui.org ...") driver.get("http://www.charmm-gui.org/?doc=input/ligandrm") pdb_element = driver.find_element_by_name("file2") pdb_element.send_keys(file) upload_button = driver.find_element_by_xpath( "//input[@type='button' and @value='Upload MOL/MOL2/SDF']") upload_button.click() driver.switch_to.alert.accept() _time.sleep(5) if find_similar_residues: checkbox = driver.find_element_by_name("simi") checkbox.click() next_button = driver.find_element_by_id("nextBtn") next_button.click() # could add some support for options. For now, we just go with the # defaults. next_button = driver.find_element_by_id("nextBtn") next_button.click() try: _logging.info("Retrieving files...") wait = _wait.WebDriverWait(driver, timeout) wait.until( _EC.visibility_of_any_elements_located( (_by.By.CLASS_NAME, "download"))) except TimeoutError: raise ConnectionError("Could not retrieve any files. Please increase " "the maximum timeout or try again later.") _logging.info("Downloading TGZ archive...") filebase = _os.path.splitext(file)[0] tgz_file = driver.find_elements_by_partial_link_text(".tgz")[0] response = driver.request('POST', tgz_file.get_attribute("href"), verify=False, stream=True) with open(filebase + "_CHARMM.tgz", "wb") as file: file.write(response.raw.read()) driver.quit() return filebase + "_CHARMM.tgz"
sys.exit(4) elif code == 5: # The User-Settings file is removed if the event/giveaway informations are invalid # So it can be created again from scratch os.remove("User-Settings.txt") input("ERROR (5): User-Settings.txt Sanity check failed, please try again later.\n You may close this console or press any key\n") browser.close() sys.exit(5) if __name__ == "__main__": print("Starting Tasks\n DO NOT CLOSE THIS CONSOLE") # Setting up the selenium environment - Using Geckodriver # geckodriver.exe is expected to be found at the same directory as the program GECKODRIVER_PATH = os.path.join(os.getcwd(), "geckodriver.exe") WINDOW_SIZE = "1920,1080" firefox_options = option.Options() firefox_options.add_argument("--headless") firefox_options.add_argument("--window-size=%s" % WINDOW_SIZE) firefox_options.add_argument("disable-gpu") firefox_options.add_argument("--disable-notifications") caps = DC.DesiredCapabilities().FIREFOX.copy() caps["pageLoadStrategy"] = "eager" try: browser = Webdriver.Firefox(executable_path = GECKODRIVER_PATH, options = firefox_options, desired_capabilities = caps) except: #Exit code 4 is when geckodriver.exe isn't found in the directory (usually) exitScript(4) #Setting up a few explicit wait classes, for standard and extended loading respectively
import scrapy from selenium import webdriver from selenium.webdriver.firefox import options opt = options.Options() opt.headless = True class ArticleSpider(scrapy.Spider): name = 'article' start_urls = ['https://www.ssga.com/us/en/individual/etfs/fund-finder'] def __init__(self): self.driver = webdriver.Firefox(options=opt) def parse(self, response): self.driver.get(response.url) print(response.css('td').extract())
def setUpClass(cls): super().setUpClass() firefox_options = options.Options() firefox_options.headless = False cls.driver = WebDriver(options=firefox_options)