def browser(request): browser = request.config.getoption("--browser") headless = request.config.getoption("--headless") maximized = request.config.getoption("--maximized") driver = None if browser == "chrome": options = webdriver.ChromeOptions() if headless: options.headless = True driver = webdriver.Chrome(options=options) elif browser == "firefox": options = webdriver.FirefoxOptions() if headless: options.headless = True driver = webdriver.Firefox(options=options) elif browser == "opera": options = OperaOptions() if headless: options.headless = True driver = webdriver.Opera(options=options) elif browser == "edge": driver = webdriver.Edge( "C:\\Users\\Mikhail\\Downloads\\driver\\msedgedriver.exe") if maximized: driver.maximize_window() return driver
def create_local_driver(request): drivers_path = request.config.getoption("--drivers_path") headless = request.config.getoption("--headless") browser = request.config.getoption("--browser") if browser == "chrome": options = webdriver.ChromeOptions() options.headless = headless driver = webdriver.Chrome(executable_path=drivers_path + "/chromedriver", options=options) elif browser == "opera": if headless: raise NotImplementedError("This mode is not supported") options = Options() driver = webdriver.Opera(executable_path=drivers_path + "/operadriver", options=options) elif browser == "firefox": options = webdriver.FirefoxOptions() options.headless = headless driver = webdriver.Firefox(executable_path=drivers_path + "/geckodriver", options=options) else: raise ValueError("Browser is not supported") request.addfinalizer(driver.quit) return driver
def driver(browser): if browser == "firefox": firefox_options = FirefoxOptions() firefox_options.add_argument("headless") driver = webdriver.Firefox(options=firefox_options) driver.maximize_window() elif browser == "opera": opera_options = OperaOptions() opera_options.add_argument("headless") driver = webdriver.Opera(options=opera_options) driver.maximize_window() else: chrome_options = ChromeOptions() chrome_options.headless = False chrome_options.add_argument("headless") driver = webdriver.Chrome( executable_path='homework_Selenium/driver/chromedriver', options=chrome_options) driver.maximize_window() constants.DRIVER = driver yield if sys.exc_info(): constants.DRIVER.quit()
class DriverBase(object): def __init__(self, config, profile): self.config = config self.options = Options() #self.options.add_argument("lang=en-US") self.options.add_argument("mute-audio") version = self.choose_version(self.config['opera_path']) self.options.binary_location = os.path.join(self.config['opera_path'], version, 'opera.exe') self.options.add_argument('user-data-dir=' + profile) self.d = None # instance of the webdriver def _elem_to_str(self, e): return e.get_attribute('outerHTML') def _print_all_elems(self, e): for x in e.find_elements_by_css_selector("*"): print(self._elem_to_str(x)) # choose the latest browser version in the folder def choose_version(self, folder): return sorted([ f.name for f in os.scandir(folder) if f.is_dir() and re.match(r'[\d.]+', f.name) ])[-1] def open_browser(self): self.d = webdriver.Opera(options=self.options)
def getBrowser(self): global _browser if _browser is None: # Use selenium logging.info("Creating Selenium webdriver using " + str(self.browser_type)) # FIREFOX if self.browser_type == EBrowser.FIREFOX: binary = FirefoxBinary(self.firefox_binary) # see http://selenium-python.readthedocs.io/faq.html#how-to-auto-save-files-using-custom-firefox-profile if self.browser_user_profile: profile = webdriver.FirefoxProfile(self.firefox_profile_path) else: profile = webdriver.FirefoxProfile() _browser = webdriver.Firefox(firefox_binary=binary, firefox_profile=profile) self.sleep(1) _browser.set_window_position(0, 0) # PHANTOMJS elif self.browser_type == EBrowser.PHANTOMJS: dcap = dict(DesiredCapabilities.PHANTOMJS) useragent = ua.random dcap["phantomjs.page.settings.userAgent"] = useragent logging.info("Useragent='" + useragent + "'") _browser = webdriver.PhantomJS(executable_path=self.phantomjs_path, desired_capabilities=dcap) self.sleep(1) _browser.set_window_size(1920, 1080) self.sleep(1) # CHROME elif self.browser_type == EBrowser.CHROME: chrome_options = ChromeOptions() chrome_options.add_argument("--start-maximized") chrome_options.binary_location = self.chrome_binary _browser = webdriver.Chrome(chrome_options=chrome_options) self.sleep(3) # CHROME (headless) elif self.browser_type == EBrowser.CHROME_HEADLESS: chrome_options = ChromeOptions() chrome_options.add_argument("--headless") chrome_options.add_argument("--window-size={},{}".format(self.chrome_headless_screen_size[0], self.chrome_headless_screen_size[1])) chrome_options.binary_location = self.chrome_binary _browser = webdriver.Chrome(chrome_options=chrome_options) self.sleep(1) # OPERA elif self.browser_type == EBrowser.OPERA: opera_options = OperaOptions() opera_options.binary_location = r"C:\Program Files\Opera\launcher.exe" _browser = webdriver.Opera(opera_options=opera_options) self.sleep(5) return _browser
def get(self): cap = DesiredCapabilities.OPERA.copy() opt = OperaOptions() opt.binary_location = self.binary_location drv = webdriver.Opera(executable_path=self.driver_path, desired_capabilities=cap, options=opt) self.set_window_size(drv) self.set_window_position(drv) return drv
def opera_second(): options = Options() profile = 'C:/Users/user/Desktop/idiq/bin/second/Opera Software/Opera Stable' options.add_argument('user-data-dir=' + profile) options.add_argument('private') options.add_argument("--kiosk-printing") options.add_argument( "download.default_directory=C:/Users/{username}/Desktop/") global driver driver = webdriver.Opera(options=options) driver.get('https://www.identityiq.com/login.aspx') time.sleep(2) IDIQ.logon(acc[0], acc[1])
def __init__(self, config, profile): self.config = config self.options = Options() #self.options.add_argument("lang=en-US") self.options.add_argument("mute-audio") version = self.choose_version(self.config['opera_path']) self.options.binary_location = os.path.join(self.config['opera_path'], version, 'opera.exe') self.options.add_argument('user-data-dir=' + profile) self.d = None # instance of the webdriver
def test_cartCost(self): opts = Options() opts.headless = True assert opts.headless # без графического интерфейса. browser = Opera(options=opts) browser.get('https://luch.by/kollektsii/volat-2-0/728377624/') item1Price = int(browser.find_element_by_class_name('styled-price').text[:-4]) browser.find_element_by_class_name('button_add').click() browser.get('https://luch.by/kollektsii/classic/76730567/') item2Price = int(browser.find_element_by_class_name('styled-price').text[:-4]) browser.find_element_by_class_name('button_add').click() browser.get('https://luch.by/cart/') total = int(browser.find_element_by_class_name('styled-price').text[:-4]) self.assertEqual(total, item1Price+item2Price)
def browser(request): browser = request.config.getoption("--browser") driver = None if browser == "chrome": options = webdriver.ChromeOptions() # options.headless = True driver = webdriver.Chrome(options=options) elif browser == "firefox": options = webdriver.FirefoxOptions() options.headless = True driver = webdriver.Firefox(options=options) elif browser == "opera": options = OperaOptions() options.headless = True driver = webdriver.Opera(options=options) driver.maximize_window() yield driver driver.quit()
def dragonfly(cls, target_url='', params={}, headers={}, cookies={}, method="get", return_params=False, for_cookies=False, proxy_pwd="", proxy_user="", proxy_host="dyn.horocn.com", proxy_port="50000"): resp = '' if 1 > len(target_url) or 1 > len(proxy_user) or 1 > len(proxy_pwd): return (-1, f"wrong parameters") if not isinstance(headers, dict) or 1 > len(headers): headers = { "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1", "Referer": "http://c.easygo.qq.com/eg_toc/map.html?origin=csfw", } proxy_meta = "http://%(user)s:%(pass)s@%(host)s:%(port)s" % { "host": proxy_host, "port": proxy_port, "user": proxy_user, "pass": proxy_pwd, } proxies = { "httpProxy": proxy_meta, "noProxy": None, "proxyType": "MANUAL", "class": "org.openqa.selenium.Proxy", "autodetect": False, } try: options = Options() options.add_argument('accept="application/json"') options.add_argument( 'referer="http://c.easygo.qq.com/eg_toc/map.html?origin="') options.add_argument( 'user-agent="Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1"' ) options.add_argument('lang=zh_CN.UTF-8') desired_capabilities = options.to_capabilities() desired_capabilities['proxy'] = proxies # options.add_argument('--headless') # 在广州分公司将无头配置项注销掉,直接在windows下运行 # https://blog.csdn.net/vinson0526/article/details/51850929 # driver = webdriver.Chrome(desired_capabilities = desired_capabilities, options=options, executable_path="chromedriver.exe") # 需要使用相同目录下的这个版本的chromedriver.exe;否则报版本错误 # https://get.geo.opera.com/pub/opera/desktop/58.0.3135.47/win/ # https://github.com/operasoftware/operachromiumdriver/releases # this one is only for Windows NT, cannot for Win10 Home Edition # executable_path="C:\\Entrobus\\projects\\crawl\\drivers\\operadriver.exe" driver = webdriver.Opera(desired_capabilities=desired_capabilities, options=options, executable_path="chromedriver.exe") driver.implicitly_wait(30) driver.get( "http://c.easygo.qq.com/eg_toc/map.html?origin=csfw&cityid=110000" ) # qq_num = "3382624374" # qq_passwd = "co07s9a4w6e" qq_num = "1934267750" qq_passwd = "yu101472" driver.find_element_by_id("u").send_keys(qq_num) driver.find_element_by_id("p").send_keys(qq_passwd) driver.find_element_by_id("go").click() #检查是否存在验证码 if "手机统一登录" in driver.title: # 成功登陆以后变成:宜出行 # raise CookieException print(f"the qq number need to safe verify is {qq_num}") time.sleep(35) cookies = driver.get_cookies() driver.quit() driver.close() # save cookie cookies_dict = {} for cookie in cookies: cookies_dict[cookie["name"]] = cookie["value"] cookies_string = "" for index, one in enumerate(cookies_dict): cookies_string += f"{str( one)}:{cookies_dict[one]};" cookies_string = cookies_string.strip(";") return (200, cookies_string) except Exception as ex: driver.close() return ( -2, f"Exception happens while crawling {target_url} Exception = {ex}" )
def get_options(self): return Options()
from selenium import webdriver # do autotests on browser # simulate clicks and typing from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.support.select import Select # verify if tag is a 'select' from selenium.webdriver.support.ui import WebDriverWait # makes webdrive wait page load from selenium.webdriver.support import expected_conditions as ec # verify if expected conditions is valid from selenium.webdriver.opera.options import Options # opera webdriver options from webdriver_manager.opera import OperaDriverManager # opera webdriver manager import json # manipulate JSON files from xhtml2pdf import pisa # to build PDF with HTML """ get HTML content """ url = "https://www.nba.com/stats/players/traditional/?sort=PLAYER_NAME&dir=-1" # NBA tabledata URL option = Options() option.headless = True # execute without graphical interface driver = webdriver.Opera( options=option) # call the webdriver to open the browser and get the URL #driver = webdriver.Opera(OperaDriverManager().install(), options=option) # call the webdriver - installed on cache by driver manager - to open the browser and get the URL driver.get(url) time.sleep(10) # delay do get data driver.find_element_by_xpath( "//div[@class='nba-stat-table']//table//thead//tr//th[@data-field='PTS']" ).click() # emulate click to sort by poits element = driver.find_element_by_xpath("//div[@class='nba-stat-table']//table") html_content = element.get_attribute("outerHTML") # html table content """ parse to BS4 HTML structure """
driver.get('https://whatismyipaddress.com') sleep(10) driver.quit() from selenium import webdriver from time import sleep # The profile where I enabled the VPN previously using the GUI. options = webdriver.ChromeOptions() options.add_argument("--user-data-dir=chrome-data/") driver = webdriver.Opera(executable_path='operadriver.exe', options=options) driver.get('https://whatismyipaddress.com') sleep(10) driver.quit() from selenium.webdriver.chrome.options import Options from selenium import webdriver options = Options() options.add_argument("--user-data-dir=chrome-data/") pluginfile = 'HMA-VPN-Proxy-Unblocker.zip' options.add_extension(pluginfile) driver = webdriver.Chrome(executable_path='chromedriver.exe', chrome_options=options) driver.get( 'https://my.hidemyass.com/?utm_medium=prg_link&utm_source=extension_chrome&utm_campaign=hma_mvp#login' )
c = sheet.cell(row=row_number, column=column_number) c.value = data print(data) # Opening the excel file and reading the URL wb = xl.load_workbook('Web Scrapping Task..xlsx') sheet = wb['Sheet1'] row_value = sheet.cell(row=2, column=1) # print(row_value.value) url_value = row_value.value.split(":", 1)[1] # print(url_value) # Using opera driver for automation # Running in headless mode opt = Options() opt.add_argument("--headless") driver = webdriver.Opera( executable_path='C:\\Users\\hp\\Downloads\\Driver For ' 'Automation\\operadriver_win64\\operadriver_win64\\operadriver.exe') # Opening the site in full screen mode driver.maximize_window() driver.get(url_value) # Wait for Page to Load time.sleep(30) # Login to facebook driver.find_element_by_xpath( "(//*[@class='m9osqain jq4qci2q a3bd9o3v'])[1]").click()
def options(): return Options()
def driver_factory(browser_name: str, is_headless: bool = False, webdriver_logging: bool = False) -> webdriver: """ Return webdriver object for a specified browser name """ driver = None if browser_name == "firefox": firefox_options = webdriver.FirefoxOptions() if is_headless: firefox_options.add_argument("--headless") if webdriver_logging: driver = EventFiringWebDriver( driver=webdriver.Firefox(options=firefox_options), event_listener=DriverEventListener( log_filename=f"{browser_name}.log")) else: driver = webdriver.Firefox(options=firefox_options) elif browser_name == "chrome": chrome_options = webdriver.ChromeOptions() if is_headless: chrome_options.add_argument("--headless") capabilities = DesiredCapabilities.CHROME.copy() capabilities['acceptSslCerts'] = True capabilities['acceptInsecureCerts'] = True capabilities['loggingPrefs'] = {'browser': 'ALL'} if webdriver_logging: driver = EventFiringWebDriver( driver=webdriver.Chrome(options=chrome_options, desired_capabilities=capabilities), event_listener=DriverEventListener( log_filename=f"{browser_name}.log")) else: driver = webdriver.Chrome(options=chrome_options, desired_capabilities=capabilities) elif browser_name == "yandex": # TODO: implement Yandex browser pass elif browser_name == "opera": opera_options = OperaOptions() opera_options.binary_location = OPERA_BROWSER_EXECUTABLE if is_headless: opera_options.add_argument("--headless") capabilities = DesiredCapabilities.OPERA.copy() capabilities['acceptSslCerts'] = True capabilities['acceptInsecureCerts'] = True if webdriver_logging: driver = EventFiringWebDriver( driver=webdriver.Opera( options=opera_options, executable_path=OPERA_WEBDRIVER_EXECUTABLE, desired_capabilities=capabilities), event_listener=DriverEventListener( log_filename=f"{browser_name}.log")) else: driver = webdriver.Opera( options=opera_options, executable_path=OPERA_WEBDRIVER_EXECUTABLE, desired_capabilities=capabilities) elif browser_name == "safari": # TODO: implement Safari browser pass else: raise NameError("Browser not supported") return driver
from selenium import webdriver from time import sleep from selenium.webdriver.chrome import service from selenium.webdriver.opera.options import Options options = Options() options.binary_location = "path_to_driver" webdriver_service = service.Service("path_to_driver") webdriver_service.start() driver = webdriver.Opera(options=options, executable_path="path_to_.exe_file") driver.get('http://www.google.com') sleep(2) driver.quit()
def setup_method(self, method): self.assert_errors = '\n' browser_name = os.environ.get('BROWSER') if browser_name == 'firefox': profile = webdriver.FirefoxProfile() profile.set_preference("devtools.console.stdout.content", True) # if hasattr(method, 'portable'): # profile.set_preference( # "general.useragent.override", # "Mozilla/5.0 (Linux; Android 8.0;" # "Pixel 2 Build/OPD3.170816.012) " # "AppleWebKit/537.36 (KHTML, like Gecko)" # "Chrome/70.0.3538.77 Mobile Safari/537.36" # ) # profile.update_preferences() firefox_options = FirefoxOptions() firefox_options.add_argument("-no-remote") firefox_options.log.level = 'trace' self.driver = webdriver.Firefox(firefox_profile=profile, options=firefox_options) elif browser_name == 'opera': opera_options = OperaOptions() opera_options.add_argument("--verbose") opera_options.add_argument("--enable-logging --v=1") opera_options.add_argument("--no-sandbox") opera_options.add_argument("--ignore-certificate-errors") opera_options.add_argument("--disable-notifications") opera_options.add_argument("--disable-gpu") opera_options.add_experimental_option('w3c', False) # if hasattr(method, 'portable'): # opera_options.add_experimental_option( # "mobileEmulation", # {'deviceName': 'Nexus 5'} # ) opera_options.binary_location = '/usr/bin/opera' self.driver = webdriver.Opera(options=opera_options) else: options = ChromeOptions() options.add_argument("--verbose") options.add_argument("--enable-logging --v=1") options.add_argument("--no-sandbox") options.add_argument("--ignore-certificate-errors") options.add_argument("--disable-notifications") options.add_argument("--disable-gpu") options.add_experimental_option('w3c', False) if hasattr(method, 'portable'): options.add_experimental_option("mobileEmulation", {'deviceName': 'Nexus 5'}) self.driver = webdriver.Chrome(chrome_options=options) self.driver.maximize_window()
from selenium import webdriver from selenium.webdriver.opera.options import Options import time import requests import os import json # required to do if opera is not installed in default loacation options = Options() options.binary_location = "A:\\Opera\\launcher.exe" options.add_argument("--remote-debugging-port=9222") # driver initiliazation driver_path = webdriver.Opera( executable_path= "C:\\Users\\nouamane\\Downloads\\Programms\\operadriver.exe", options=options) def play_video(): # global youtube watch url watch_on_yotube = 'http://www.youtube.com/watch?v=' api_key = os.environ.get( 'TUBE_KEY') # get it from google developer api key channel_id = 'UCwO_xoYm2vjhu4kZSxFO5mA' # your favorite youtuber id channel base_url = 'https://www.googleapis.com/youtube/v3/search?' # optional parameters to be passed to the url
def _set_driver(self): if self.browser == 'opera': options = Options() options.binary_location = DriverPath.OPERA_BINARY_PATH print('\nBinary Path: {}'.format(DriverPath.OPERA_BINARY_PATH)) try: self.driver = webdriver.Opera(options=options) except SessionNotCreatedException as e: print('\nSessionNotCreatedException:', e.msg) raise except WebDriverException as e: print('\nWebDriverException:', e.msg) path = self._get_driver_path() print('Trying to look for a \'operadriver\' under:\n{}'.format(path)) self.driver = webdriver.Opera(options=options, executable_path=path) if self.browser == 'chrome': try: self.driver = webdriver.Chrome() except WebDriverException as e: print('\nPlease note:', e.msg) path = self._get_driver_path() print('Trying to look for a \'chromedriver\' under:\n{}'.format(path)) self.driver = webdriver.Chrome(executable_path=path) if self.browser == 'ie': try: self.driver = webdriver.Ie() except WebDriverException as e: print('\nPlease note:', e.msg) path = self._get_driver_path() print('Trying to look for a \'IEDriverServer\' under:\n{}'.format(path)) self.driver = webdriver.Ie(executable_path=path) if self.browser == 'mozilla': try: self.driver = webdriver.Firefox() except WebDriverException as e: print('\nPlease note:', e.msg) path = self._get_driver_path() print('Trying to look for a \'geckodriver\' under:\n{}'.format(path)) self.driver = webdriver.Firefox(executable_path=path) if self.browser == 'edge': # Purpose: Probe the underlying platform’s hardware, operating system, # and interpreter version information. # print('Version tuple:', platform.python_version_tuple()) # print('Compiler :', platform.python_compiler()) # print('Build :', platform.python_build()) if sum(int(i) for i in platform.python_version_tuple()) > 13: print('\nVersion:', platform.python_version()) print('WebDriver is now a Feature On Demand') print('For more info please check: {}'.format( 'https://blogs.windows.com/msedgedev/2018/06/14/' 'webdriver-w3c-recommendation-feature-on-demand/#Rg8g2hRfjBQQVRXy.97\n')) self.driver = webdriver.Edge() else: path = self._get_driver_path() print('Trying to look for a \'MicrosoftWebDriver\' under:\n{}'.format(path)) self.driver = webdriver.Edge(executable_path=path)
def driver(request): logger = logging.getLogger('BrowserLogger') browser = request.config.getoption("--browser") version = request.config.getoption("--bversion") executor = request.config.getoption("--executor") vnc = request.config.getoption("--vnc") logs = request.config.getoption("--logs") videos = request.config.getoption("--videos") executor_url = f"http://{executor}:4444/wd/hub" mobile = request.config.getoption("--mobile") is_remote = request.config.getoption("--is_remote") caps = { "browserName": browser, "browserVersion": version, "screenResolution": "1280x720", "name": "Tester", "selenoid:options": { "enableVNC": vnc, "enableVideo": videos, "enableLog": logs, }, 'acceptSslCerts': True, 'acceptInsecureCerts': True, 'timeZone': 'Europe/Moscow', 'goog:chromeOptions': { 'args': [] } } mobile_emulation = {"deviceName": "iPhone 5/SE"} if is_remote: if browser == "chrome" and mobile: caps["goog:chromeOptions"]["mobileEmulation"] = mobile_emulation driver = wd.Remote( command_executor=executor_url, desired_capabilities=caps ) else: if browser == "firefox": firefox_options = FirefoxOptions() firefox_options.headless = True driver = wd.Firefox(options=firefox_options) elif browser == "opera": opera_options = OperaOptions() opera_options.add_argument("headless") driver = wd.Opera(options=opera_options) elif browser == "chrome": chrome_options = ChromeOptions() # chrome_options.add_argument("headless") if mobile: chrome_options.add_experimental_option("mobileEmulation", mobile_emulation) driver = wd.Chrome(options=chrome_options) else: raise ValueError(f"{browser} browser not supported") if not mobile: driver.maximize_window() logger.info(f"Browser {browser} started.") driver = EventFiringWebDriver(driver, MyListener()) # Attach browser data allure.attach( name=driver.session_id, body=json.dumps(driver.desired_capabilities), attachment_type=allure.attachment_type.JSON) # Add environment info to allure-report with open("allure-report/environment.xml", "w+") as file: file.write(f"""<environment> <parameter> <key>Browser</key> <value>{browser}</value> </parameter> <parameter> <key>Browser.Version</key> <value>{version}</value> </parameter> <parameter> <key>Executor</key> <value>{executor}</value> </parameter> </environment> """) def teardown(): driver.quit() logger.info(f"Browser {browser} closed") request.addfinalizer(teardown) return driver
from selenium.webdriver.common.keys import Keys from selenium.webdriver.opera.options import Options from selenium import webdriver from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By from selenium.common.exceptions import TimeoutException import time #opciones options = Options() options.add_argument("--remote-debugging-port=9222") #options.add_argument("--headless") opera_driver = webdriver.Opera(executable_path="operadriver", options=options) opera_driver.get( "https://kahoot.it/challenge/09168452?challenge-id=3882dbfd-4f85-4e86-9fa2-018882d19a9a_1605679445775" ) delay = 20 nickname = "RaymundoPulido" user = None try: user = WebDriverWait(opera_driver, delay).until( EC.presence_of_element_located((By.ID, "nickname"))) except TimeoutException: print("Error al abrir el inicio de Seccion") if user != None: time.sleep(1)
self.items.delete_many({}) def insert(self, entity): self.items.insert_one(entity) def head(self, num): objects = self.items.find().limit(num) for item in objects: print(item) def contains(self, item): return self.items.find(item).count() if __name__ == '__main__': opera_options = Options() opera_options.add_argument('--start-maximized') driver = webdriver.Opera(options=opera_options) mongo = Mongo(blank=True) try: links = get_pages(driver) for link in links: data = get_data(link) if not mongo.contains(data): mongo.insert(data) driver.quit() except Exception as e: print(e) driver.quit()
#https://bidplus.gem.gov.in/advance-search?state_name=DELHI&city_name=CENTRAL+DELHI&from_date=18-04-2021&to_date=30-04-2021&searchlocation=Search from selenium import webdriver import os import datetime from datetime import timedelta from selenium.webdriver.opera.options import Options dirname = os.getcwd() OPERADRIVER_PATH = os.path.join(dirname, "media/operadriver") PREFPATH = os.path.join(dirname, "media/") prefs = {"download.default_directory": PREFPATH, "directory_upgrade": True} opts = Options() opts.add_experimental_option("prefs", prefs) driver = webdriver.Opera(executable_path=OPERADRIVER_PATH, options=opts) def find_and_download(state, city, start_date, end_date): flag = driver.find_elements_by_xpath( "//strong[contains(text(), 'Empty!')]") url = f"https://bidplus.gem.gov.in/advance-search?state_name={state}&city_name={city}&from_date={start_date}&to_date={end_date}&searchlocation=Search&page_no=1" driver.get(url) pageno = 1 while (flag == []): url = f"https://bidplus.gem.gov.in/advance-search?state_name={state}&city_name={city}&from_date={start_date}&to_date={end_date}&searchlocation=Search&page_no={pageno}" driver.get(url) items = driver.find_elements_by_xpath(
def test_opera_options_is_deprecated(options): with pytest.warns(DeprecationWarning) as captured: Options() expected = "<class 'selenium.webdriver.opera.options.Options'> is deprecated and will be removed in 4.3; " \ "see: https://www.selenium.dev/documentation/webdriver/getting_started/open_browser/#opera" assert captured[0].message.args[0] == expected
from selenium.webdriver.opera.options import Options import time import requests import pandas as pd from bs4 import BeautifulSoup from selenium import webdriver import json #1. Pegar conteudo HTML a partir da URL url = "https://stats.nba.com/players/traditional/?PerMode=Totals&Season=2019-20&SeasonType=Regular%20Season&sort=PLAYER_NAME&dir=-1" local = r"C:\Users\Usuario\Documents\operadriver.exe" #precisa do 'r' pra indicar o caminho option = Options() #instaciando a classe Option option.headless = False #headless é pra nao mostra o programa aberto driver = webdriver.Opera(executable_path=local, options=option) driver.get(url) time.sleep(10) driver.find_element_by_xpath( "//div[@class='nba-stat-table']//table//thead//tr//th[@data-field='PTS']" ).click() #Procura o caminho do elemento e clica nele element = driver.find_element_by_xpath("//div[@class='nba-stat-table']//table") html_content = element.get_attribute('outerHTML') # 2. Parsear o conteúdo HTML - BeatifulSoup #Faz anpalise do html e tranforma num dado estruturado soup = BeautifulSoup(html_content, 'html.parser') table = soup.find(name='table') # 3. Estruturar conteudo em um Data Frame - Pandas
def translate(src_lang, tgt_lang, list_text): global translate_file_name list_tgt = list() count = 1 search_text = "" sign = ";" #f = open("link.test.txt", "w", encoding="utf-8") loop = 1 opera_profile = "C:\\Users\\\Admin\\AppData\\Roaming\\Opera Software\\Opera Stable" options = Options() options.binary_location = r'C:\Users\\Admin\AppData\Local\Programs\Opera\74.0.3911.107\opera.exe' options.add_argument("--headless") options.add_argument('--no-sandbox') #options.add_argument('--user-data-dir=' + opera_profile) #driver = webdriver.PhantomJS(os.path.abspath(os.getcwd()+"\phantomjs")) #driver = webdriver.Opera(executable_path=os.path.abspath(os.getcwd()+"/operadriver.exe"), options = options) driver = ChromeDriver.getChromeDriver() url = "https://translate.google.com/?sl={}&tl={}&text={}&op=translate".format( src_lang, tgt_lang, "") driver.get(url) textarea = driver.find_element_by_tag_name("textarea") old_text = "" print(len(list_text)) len_of_batch = 0 lim_of_batch = 3800 text_to_translate = "" list_transed_text = list() try: for text in list_text: if (loop % 100 == 0): driver.close() driver = ChromeDriver.getChromeDriver() url = "https://translate.google.com/?sl={}&tl={}&text={}&op=translate".format( src_lang, tgt_lang, "") driver.get(url) textarea = driver.find_element_by_tag_name("textarea") loop = 1 textarea = driver.find_element_by_tag_name("textarea") if (len_of_batch > lim_of_batch): last_index = len(text_to_translate) - 1 if (text_to_translate[last_index] == "\n"): text_to_translate = text_to_translate[0:last_index - 1] textarea.send_keys(text_to_translate) list_transed_text = getTranslateContent(driver, list_tgt) textarea.clear() list_tgt = list() len_of_batch = 0 text_to_translate = "" loop = loop + 1 time.sleep(0.5) else: text_len = len(text) if (text_len + len_of_batch > 4000): last_index = len(text_to_translate) - 1 if (text_to_translate[last_index] == "\n"): text_to_translate = text_to_translate[0:last_index - 1] textarea.send_keys(text_to_translate) list_transed_text = getTranslateContent(driver, list_tgt) textarea.clear() len_of_batch = 0 list_tgt = list() loop = loop + 1 text_to_translate = "" time.sleep(0.5) len_of_batch = len_of_batch + text_len text = bytes(text, "utf-8").decode('utf-8', 'ignore') text = text.replace("\r", "") not_bmp = x = re.search(u'[\U00010000-\U0010ffff]', text) if not_bmp: continue text_to_translate = text_to_translate + text + " \n" list_tgt.append(text) if list_transed_text: saveFile(translate_file_name, list_transed_text) list_transed_text = list() except Exception as e: print(e) traceback.print_exc() time.sleep(3) #pdb.set_trace() pass #f.close() if (len_of_batch > 0): textarea.send_keys(text_to_translate) list_transed_text = getTranslateContent(driver, list_tgt) if list_transed_text: if (len(list_transed_text) > 0): saveFile(translate_file_name, list_transed_text) driver.close()
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.104 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2866.71 Safari/537.36", "Mozilla/5.0 (X11; Ubuntu; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2919.83 Safari/537.36", "Mozilla/5.0 (X11; Ubuntu; Linux i686 on x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2820.59 Safari/537.36" ] headers = { 'User-Agent': random.choice(uastrings), 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'Accept-Encoding': 'gzip', 'DNT': '1' } options = Options() options.headless = False options.add_argument("--incognito") driver = webdriver.Opera(options=options, executable_path=DRIVER_PATH) url_temp = "https://tuoitre.vn/" + 'the-gioi' + "/trang-" + str(100 - 1) + ".htm" driver.get(url_temp) driver.implicitly_wait(30) click = 1 try: while click < 1000: try: driver.execute_script("return arguments[0].scrollIntoView(true);", WebDriverWait(driver, 10).until(
from selenium.webdriver.opera.options import Options import time import requests import pandas as pd from bs4 import BeautifulSoup from selenium import webdriver import json # 1 - Pegar o conteudo HTML url = "https://covid.saude.gov.br" operadrive = r"C:\Users\Usuario\Documents\operadriver.exe" option = Options() option.headless = True drive = webdriver.Opera(executable_path=operadrive, options=option) drive.get(url) #Faz o request time.sleep(10) element = drive.find_element_by_xpath( "/html/body/app-root/ion-app/ion-router-outlet/app-home/ion-content/painel-geral-component/div/div[1]/div/div[1]" ) tabelaHTML = element.get_attribute('outerHTML') # 2 - Parsear o HTML soup = BeautifulSoup(tabelaHTML, 'html.parser') table = soup.find('lista-sanfona-component') # 3 - Criar o dataframe df_full = pd.read_html(str(table))[0].head() print(df_full) drive.quit()