def startMobileDriver(self): options = Options() options.set_headless(self.useHeadless) if self.loadDefaultProfile == True: firefoxMobileProfile = webdriver.FirefoxProfile(profile_directory=self.ffProfileDir) else: firefoxMobileProfile = None if self.mobileUA != None: firefoxMobileProfile.set_preference("general.useragent.override", self.mobileUA) self.firefoxMobileDriver = webdriver.Firefox( firefox_profile=firefoxMobileProfile, executable_path=self.driverBinary, firefox_options=options) self.mobileRunning = True if self.loadCookies == True and self.cookies != None: self.firefoxMobileDriver.delete_all_cookies() # Loading cookies only works if we are at a site the cookie would work for self.getMobileUrl("https://login.live.com") for cookie in self.cookies: # print("Adding cookie to Firefox Mobile Driver: %s" % str(cookie)) # new_cookie = {} # new_cookie['name'] = cookie['name'] # new_cookie['value'] = cookie['value'] try: self.firefoxMobileDriver.add_cookie(cookie) except selenium.common.exceptions.InvalidCookieDomainException: contine
def initialize_browser(for_scenario_2=False): browser = None if for_scenario_2: # Test Scenario 2 requires users to download things from their browser. # Define a custom profile for Firefox, to automatically download files that a page asks user to download, without asking. This is because Selenium can't control downloads. profile = webdriver.FirefoxProfile() profile.set_preference('browser.download.folderList', 2) # Can be set to either 0, 1, or 2. When set to 0, Firefox will save all files downloaded via the browser on the user's desktop. When set to 1, these downloads are stored in the Downloads folder. When set to 2, the location specified for the most recent download is utilized again. profile.set_preference('browser.download.manager.showWhenStarting', False) profile.set_preference('browser.download.dir', settings.BROWSER_DOWNLOAD_FOLDER) mime_types_that_should_be_downloaded = ['text/plain', 'application/json'] profile.set_preference('browser.helperApps.neverAsk.saveToDisk', ';'.join(mime_types_that_should_be_downloaded)) if settings.USE_HEADLESS_BROWSER: from selenium.webdriver.firefox.options import Options options = Options() options.add_argument("--headless") options.log.level = "trace" if for_scenario_2: browser = webdriver.Firefox(profile, options=options) else: browser = webdriver.Firefox(options=options) else: if for_scenario_2: browser = webdriver.Firefox(profile) else: browser = webdriver.Firefox() # browser.maximize_window() # make the browser window use all available screen space. FIXME: When enabled, some clicks are not triggered anymore browser.implicitly_wait(settings.WAIT_TIME_BETWEEN_EACH_STEP) # In seconds return browser
def get_driver(self,force=False): """ セレニアムドライバ初期化 """ if force : # 強制生成なら先にクローズしとく self.close() if not self.driver : # ヘッドレスFF初期化 # UA偽造 profile = webdriver.FirefoxProfile() profile.set_preference("general.useragent.override", self.user_agent) # ヘッドレス定義 options = Options() options.add_argument("--headless") # 起動 self.driver = webdriver.Firefox(profile, firefox_options=options) # PhantomJS初期化 # ユーザーエージェント偽装 ua = dict(DesiredCapabilities.PHANTOMJS) ua['phantomjs.page.settings.userAgent'] = (self.user_agent) # 初期化 #self.driver = webdriver.PhantomJS(desired_capabilities=ua) # Httpヘッダ設定 # ウェイト設定 self.driver.implicitly_wait(15) # seconds return self.driver
def getCDMStatusPage(tid_crm): from selenium import webdriver from selenium.webdriver.firefox.options import Options options = Options() options.add_argument("--headless") browser = webdriver.Firefox(firefox_options=options) # now Firefox will run headless # you will not see the browser. link = 'http://172.18.65.42/monitorcdm/' browser.get(link) browser.find_elements_by_css_selector("input[type='radio'][value='GUEST']")[0].click() browser.find_element_by_class_name('tbutton').click() browser.get(link) browser.get('http://172.18.65.42/monitorcdm/?_module_=search_tid') form_textfield = browser.find_element_by_name('_termid_') form_textfield.send_keys(tid_crm) browser.find_element_by_class_name('tbutton').click() html = browser.page_source browser.quit() return html
def start_driver(self, browser_type, capabilities, config_section=None): """ Prepare selenium webdriver. :param browser_type: type of browser for which prepare driver :param capabilities: capabilities used for webdriver initialization """ # get browser profile browser_profile = self.get_browser_profile(browser_type, capabilities, config_section) # starts local browser if browser_type == "firefox": from selenium.webdriver.firefox.options import Options firefox_options = Options() for arg in self.get_browser_arguments(config_section): firefox_options.add_argument(arg) driver = webdriver.Firefox(browser_profile, desired_capabilities=capabilities, firefox_options=firefox_options) elif browser_type == "chrome": driver = webdriver.Chrome(desired_capabilities=capabilities, chrome_options=browser_profile) elif browser_type == "ie": driver = webdriver.Ie(capabilities=capabilities) elif browser_type == "phantomjs": driver = webdriver.PhantomJS(desired_capabilities=capabilities) elif browser_type == "opera": driver = webdriver.Opera(desired_capabilities=capabilities) # SafariDriver bindings for Python not yet implemented # elif browser == "Safari": # self.driver = webdriver.SafariDriver() else: raise ValueError('Unknown type of browser.') return driver
def open(self): ''' In order to have selenium working with Firefox and be able to get SAP Notes from launchpad.support.sap.com you must: 1. Use a browser certificate (SAP Passport) in order to avoid renewed logons. You can apply for it at: https://support.sap.com/support-programs-services/about/getting-started/passport.html 2. Get certificate and import it into Firefox. Open menu -> Preferences -> Advanced -> View Certificates -> Your Certificates -> Import 3. Trust this certificate (auto select) 4. Check it. Visit some SAP Note url in Launchpad. No credentials will be asked. Launchpad must load target page successfully. ''' driver = None utils = self.get_service('Utils') options = Options() options.add_argument('--headless') FIREFOX_PROFILE_DIR = utils.get_firefox_profile_dir() FIREFOX_PROFILE = webdriver.FirefoxProfile(FIREFOX_PROFILE_DIR) try: driver = webdriver.Firefox(firefox_profile=FIREFOX_PROFILE, firefox_options=options) except Exception as error: self.log.error(error) # Geckodriver not found # Download it from: # https://github.com/mozilla/geckodriver/releases/latest self.log.debug("Webdriver initialited") return driver
def _setup_firefox(self, capabilities): """Setup Firefox webdriver :param capabilities: capabilities object :returns: a new local Firefox driver """ if capabilities.get("marionette"): gecko_driver = self.config.get('Driver', 'gecko_driver_path') self.logger.debug("Gecko driver path given in properties: %s", gecko_driver) else: gecko_driver = None # Get Firefox binary firefox_binary = self.config.get_optional('Firefox', 'binary') firefox_options = Options() if self.config.getboolean_optional('Driver', 'headless'): self.logger.debug("Running Firefox in headless mode") firefox_options.add_argument('-headless') self._add_firefox_arguments(firefox_options) if firefox_binary: firefox_options.binary = firefox_binary log_path = os.path.join(DriverWrappersPool.output_directory, 'geckodriver.log') try: # Selenium 3 return webdriver.Firefox(firefox_profile=self._create_firefox_profile(), capabilities=capabilities, executable_path=gecko_driver, firefox_options=firefox_options, log_path=log_path) except TypeError: # Selenium 2 return webdriver.Firefox(firefox_profile=self._create_firefox_profile(), capabilities=capabilities, executable_path=gecko_driver, firefox_options=firefox_options)
def setUp(self): superuser = User.objects.create_superuser(self.username, '*****@*****.**', self.password) self.existing = TestModel.objects.get(pk=1) # Instantiating the WebDriver will load your browser options = Options() if settings.HEADLESS_TESTING: options.add_argument("--headless") self.webdriver = CustomWebDriver(firefox_options=options, )
def load_driver(): """ Loads the firefox driver in headless mode. """ options = Options() options.add_argument("--headless") driver = webdriver.Firefox(firefox_options=options) return driver
def setUp(self): if _CI: self.driver = self.sauce_chrome_webdriver() elif settings.SELENIUM is True: options = FirefoxOptions() options.add_argument('-headless') self.driver = Firefox(firefox_options=options) self.driver.implicitly_wait(10)
def setUpClass(cls): super(LiveTestCase, cls).setUpClass() options = Options() options.headless = True cls.selenium = WebDriver(options=options) cls.selenium.implicitly_wait(10)
def setUp(self): # Firefox options_firefox = OptionsFF() options_firefox.add_argument('-headless') self.firefox_driver = webdriver.Firefox(firefox_options=options_firefox) # Chrome options_chrome = OptionsChrom() options_chrome.add_argument('-headless') self.chrome_driver = webdriver.Chrome(chrome_options=options_chrome)
def test_arguments(self): opts = Options() assert len(opts.arguments) == 0 opts.add_argument("--foo") assert len(opts.arguments) == 1 opts.arguments.append("--bar") assert len(opts.arguments) == 2 assert opts.arguments == ["--foo", "--bar"]
def test_rendering_utf8_iframe(): iframe = elem.IFrame(html=u'<p>Cerrahpaşa Tıp Fakültesi</p>') options = Options() options.add_argument('-headless') driver = Firefox(options=options) driver.get('data:text/html,' + iframe.render()) driver.switch_to.frame(0) assert u'Cerrahpaşa Tıp Fakültesi' in driver.page_source
def setUp(self): options = Options() options.add_argument('-headless') self.browser = webdriver.Firefox(options=options) self.browser.get(redbot_uri) self.uri = self.browser.find_element_by_id("uri") self.uri.send_keys(self.test_uri) self.uri.submit() time.sleep(2.0) self.check_complete()
def test_to_capabilities(self): opts = Options() assert opts.to_capabilities() == {} profile = FirefoxProfile() opts.profile = profile caps = opts.to_capabilities() assert "moz:firefoxOptions" in caps assert "profile" in caps["moz:firefoxOptions"] assert isinstance(caps["moz:firefoxOptions"]["profile"], basestring) assert caps["moz:firefoxOptions"]["profile"] == profile.encoded opts.add_argument("--foo") caps = opts.to_capabilities() assert "moz:firefoxOptions" in caps assert "args" in caps["moz:firefoxOptions"] assert caps["moz:firefoxOptions"]["args"] == ["--foo"] binary = FirefoxBinary() opts.binary = binary caps = opts.to_capabilities() assert "moz:firefoxOptions" in caps assert "binary" in caps["moz:firefoxOptions"] assert isinstance(caps["moz:firefoxOptions"]["binary"], basestring) assert caps["moz:firefoxOptions"]["binary"] == binary._start_cmd
def test_profile(self, tmpdir_factory): opts = Options() assert opts.profile is None other_profile = FirefoxProfile() assert other_profile != opts.profile opts.profile = other_profile assert other_profile == opts.profile opts.profile = str(tmpdir_factory.mktemp("profile")) assert isinstance(opts.profile, FirefoxProfile)
def new_instance(self): """ initializes a new selenium web driver instance by using either PhantomJS or Mozilla and returns a reference to the browser object for further processing """ options = Options() if self.headless: print_debug(self.debug, 'actiating headless mode') options.add_argument('-headless') driver = webdriver.Firefox(firefox_options=options) driver.set_window_size(1024, 768) driver.set_script_timeout(5) return driver
def driver_kwargs(request, driver_kwargs, profile): if request.param == 'capabilities': options = {'profile': profile} driver_kwargs[request.param].setdefault('moz:firefoxOptions', options) elif request.param == 'firefox_profile': driver_kwargs[request.param] = profile elif request.param == 'firefox_options': options = Options() options.profile = profile driver_kwargs[request.param] = options driver_kwargs['firefox_profile'] = profile return driver_kwargs
def redeem(codes, headless=True): try: opts = Options() if headless: opts.set_headless() ffprofile = webdriver.FirefoxProfile(settings.firefox_profile_path) browser = webdriver.Firefox(firefox_profile=ffprofile, options=opts) browser.get('http://www.marvel.com/redeem') redeem_code(browser, codes) browser.quit() except: logging.error('Unable to redeem: %s' % ','.join(codes))
def test_binary(self): opts = Options() assert opts.binary is None other_binary = FirefoxBinary() assert other_binary != opts.binary opts.binary = other_binary assert other_binary == opts.binary path = "/path/to/binary" opts.binary = path assert isinstance(opts.binary, FirefoxBinary) assert opts.binary._start_cmd == path
def before_all(context): print("context", context) # Determine the target path. Can either be file path or base URL. if 'TARGET' in os.environ: context.target = os.environ['TARGET'] else: print("Please specify the Phenogrid file path or base URL with 'TARGET=' format") sys.exit(1) # Check to see which browser to use, default to use Firefox if 'BROWSER' in os.environ and os.environ['BROWSER'] == 'phantomjs': context.browser = webdriver.PhantomJS() print("# Using PhantomJS") else: options = Options() options.add_argument('-headless') context.browser = Firefox(firefox_options=options) # print("# Using Firefox") # d = DesiredCapabilities.FIREFOX # d['marionette'] = True # # d['binary'] = '/Applications/Firefox.app/Contents/MacOS/firefox-bin' # d['loggingPrefs'] = {'browser': 'ALL', 'client': 'ALL', 'driver': 'ALL', 'performance': 'ALL', 'server': 'ALL'} # fp = webdriver.FirefoxProfile() # fp.set_preference('devtools.jsonview.enabled', False) # fp.set_preference('javascript.options.showInConsole', True) # fp.set_preference('browser.dom.window.dump.enabled', True) # fp.set_preference('devtools.chrome.enabled', True) # fp.set_preference("devtools.webconsole.persistlog", True) # fp.set_preference("devtools.browserconsole.filter.jslog", True) # fp.set_preference("devtools.browserconsole.filter.jswarn", True) # fp.set_preference("devtools.browserconsole.filter.error", True) # fp.set_preference("devtools.browserconsole.filter.warn", True) # fp.set_preference("devtools.browserconsole.filter.info", True) # fp.set_preference("devtools.browserconsole.filter.log", True) # fp.set_preference("devtools.webconsole.filter.jslog", True) # fp.set_preference("devtools.webconsole.filter.jswarn", True) # fp.set_preference("devtools.webconsole.filter.error", True) # fp.set_preference("devtools.webconsole.filter.warn", True) # fp.set_preference("devtools.webconsole.filter.info", True) # fp.set_preference("devtools.webconsole.filter.log", True) # context.browser = webdriver.Firefox(capabilities=d, firefox_profile=fp, executable_path='/usr/local/bin/geckodriver') # context.browser._is_remote = False # Set a 30 second implicit wait - http://selenium-python.readthedocs.org/en/latest/waits.html#implicit-waits # Once set, the implicit wait is set for the life of the WebDriver object instance. context.browser.set_window_size(1440, 900) context.browser.implicitly_wait(30) # seconds
def reset_browser(self): if globals.browser == None: options = Options() options.add_argument(self.__get_arg()) globals.browser = Firefox(AbstractBrowserBasedTest._firefox_profile, firefox_options = options, log_path=naming.GECKODRIVER_LOG_FILE_PATH) globals.browser.set_page_load_timeout(self.DEFAULT_TIMEOUT) else: globals.browser.close() globals.browser.start_session(capabilities = AbstractBrowserBasedTest._firefox_capabilities,\ browser_profile = AbstractBrowserBasedTest._firefox_profile) globals.browser.delete_all_cookies() # Belt and Braces.
def setUp(self): self.single_empty = TestModelSingle() self.single_empty.save() self.single = TestModelSingle(**{'selection': 'octopus', }) self.single.save() self.advanced_empty = TestModelAdvanced() self.advanced_empty.save() self.advanced = TestModelAdvanced(**{'set': 'set1', }) self.advanced.save() self.superuser = create_superuser() # Instantiating the WebDriver will load your browser options = Options() if settings.HEADLESS_TESTING: options.add_argument("--headless") self.webdriver = CustomWebDriver(firefox_options=options, )
def setup_package(): """Set up the Selenium driver once for all tests.""" # Just skipping *setup_package* and *teardown_package* generates an # uncaught exception under Python 2.6. if tests_are_run: if not SHOW_BROWSER: # Perform all graphical operations in memory. vdisplay = SeleniumTestCase.vdisplay = Xvfb(width=1280, height=720) vdisplay.start() # Create a Selenium browser instance. options = Options() options.add_argument('-headless') selenium = SeleniumTestCase.selenium = Firefox(firefox_options=options) selenium.maximize_window() SeleniumTestCase.wait = ui.WebDriverWait(selenium, 10) SeleniumTestCase.selenium.implicitly_wait(3)
def EventsList(events_url): options = Options() #Grabs the first 4 events from the events page options.set_headless(headless=True) browser = webdriver.Firefox(firefox_options=options) try: browser.get(events_url) innerHTML = browser.execute_script("return document.body.innerHTML") test = (browser.page_source) browser.quit() except: test = "null" try: text = test.split('hovercard/event.php?id=')[1] text1 = text.split('hovercard/event.php?id=')[0] text1 = text1.split('"')[0] latest = text1 except: logs.print2("Couldn't extract latest event id") latest = "null" try: text = test.split('hovercard/event.php?id='+text1)[1] text2 = text.split('hovercard/event.php?id=')[1] text2 = text2.split('hovercard/event.php?id=')[0] text2 = text2.split('"')[0] first = text2 except: logs.print2("Couldn't extract first event id") first = "null" try: text = test.split('hovercard/event.php?id='+text2)[1] text3 = text.split('hovercard/event.php?id=')[1] text3 = text3.split('hovercard/event.php?id=')[0] text3 = text3.split('"')[0] second = text3 except: logs.print2("Couldn't extract second event id") second = "null" try: text = test.split('hovercard/event.php?id='+text3)[1] text4 = text.split('hovercard/event.php?id=')[1] text4 = text4.split('hovercard/event.php?id=')[0] text4 = text4.split('"')[0] third = text4 except: logs.print2("Couldn't extract third event id") third = "null" return (latest,first,second,third)
def create_browser(request, driver_wait_time, tries=0): """This sometimes fails to start firefox on CI, so we retry...""" max_tries = 5 options = Options() options.add_argument('-headless') try: driver = webdriver.Firefox(firefox_options=options) driver.implicitly_wait(driver_wait_time) driver.set_window_size(1200, 1200) request.node._driver = driver return driver except Exception as e: if tries < max_tries: return create_browser(request, driver_wait_time, tries=tries + 1) else: raise e
def __init__(self, url='http://www.fsf.org/', driver = 'C:\\Program Files\\Mozilla Firefox\\firefox.exe',**kwargs): self.url = url #self.args = args self.binarypath = FirefoxBinary(driver) self.opts = Options() self.timeout = False if kwargs is not None: if kwargs.get('headless','') and kwargs['headless']: self.opts.add('--headless') self.driver = webdriver.Firefox(firefox_binary=self.binarypath, firefox_options=self.opts)
def browserEngine(response): options = Options() options.add_argument('--headless') browser = webdriver.Firefox(options=options) response = re.sub(r'<script.*?src=.*?>', '<script src=#>', response, re.I) response = re.sub(r'href=.*?>', 'href=#>', response, re.I) writer(response, 'test.html') browser.get('file://' + sys.path[0] + '/test.html') os.remove('test.html') popUp = False actions = webdriver.ActionChains(browser) try: actions.move_by_offset(2, 2) actions.perform() browser.close() except UnexpectedAlertPresentException: popUp = True browser.quit() return popUp
def create_instances_of_webdriver(selenium, driver, browser_id_list, tmpdir, tmp_memory, driver_kwargs, driver_type, firefox_logging, firefox_path, xvfb, screen_width, screen_height, displays): for browser_id, display in zip(parse_seq(browser_id_list), cycle(xvfb)): if browser_id in selenium: raise AttributeError('{:s} already in use'.format(browser_id)) else: tmp_memory[browser_id] = {'shares': {}, 'spaces': {}, 'groups': {}, 'mailbox': {}, 'oz': {}, 'window': {'modal': None}} with redirect_display(display): temp_dir = str(tmpdir) download_dir = os.path.join(temp_dir, browser_id, 'download') if driver_type.lower() == 'chrome': options = driver_kwargs['desired_capabilities']['chromeOptions'] prefs = {"download.default_directory": download_dir} options['prefs'].update(prefs) elif driver_type.lower() == 'firefox': options = Options() profile = FirefoxProfile() log_path = _set_firefox_profile(profile, browser_id, temp_dir, firefox_logging) options.profile = profile if firefox_path is not None: options.binary = FirefoxBinary(firefox_path) driver_kwargs['firefox_options'] = options browser = driver(driver_kwargs) if driver_type.lower() == 'firefox' and firefox_logging: browser.get_log = _firefox_logger(log_path) _config_driver(browser, screen_width, screen_height) displays[browser_id] = display selenium[browser_id] = browser
from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.common.exceptions import TimeoutException from selenium.webdriver.common.action_chains import ActionChains from pymongo import MongoClient import time import pprint from pymongo.errors import BulkWriteError import signal # ------- Initialise database -------- mdb = MongoClient('192.168.2.212', 27017) reviews_db = mdb.amazon_brand_data.reviews_test_test # ------------------------------------ browser = None options = Options() options.add_argument("--headless") def exit_all(signal=None, frame=None): global browser print("Exiting by closing browser") if browser: browser.quit() sys.exit(0) exit = exit_all def get_brand_from_asin(asin): url = f"https://www.amazon.in/dp/{asin}" try: browser.get(url)
def driver_kwargs(driver_kwargs): driver_kwargs['options'] = Options() return driver_kwargs
from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.webdriver.firefox.options import Options from time import sleep domainName = "splus-wip3.pnistaging.com" applicationPath = "/services/printing" np.random.seed(0) productKeys = np.array([ "880e9a0b0a627f97", "0334a43adccf687a", "550f7cadad98fe98", "660e1c58e314b38e", "0a0e1568742a5a2e", "fadb30ec37bdebc8" ]) if __name__ == "__main__": startUrl = f"https://{domainName}{applicationPath}/legacy/station/6B696F736b5F72696B/126/redirect/" driverOptions = Options() driverOptions.headless = False driver = webdriver.Firefox(options=driverOptions) try: driver.maximize_window() driver.get(startUrl) for _ in range(10): startUrl = f"https://{domainName}{applicationPath}/" driver.get(startUrl) sleep(2) productKey = np.random.choice(a=productKeys) startUrl = f"https://{domainName}{applicationPath}/product/{productKey}/builder/" driver.get(startUrl) sleep(2) builderUrl = driver.current_url nextUrl = builderUrl.replace("Builder", "review")
token = json.loads(endpoint) response = send_web_push_v2(token, message) print('response') print(response) print('success') except Exception as e: print("error: ", e) print('failed: ', str(e)) print("PLEASE WAIT STARTING wa_monitor") cwd = os.getcwd() notify = Notify() options = FirefoxOptions() fp = webdriver.FirefoxProfile(cwd + "/profile") # useragent = "Mozilla/5.0 (X11; Linux i686; rv:77.0) Gecko/20100101 Firefox/77.0" # #works perfectly fine without useragent modification in windows awa heroku # fp.set_preference("general.useragent.override", useragent) # switch headless option accordingly # options.add_argument('--no-sandbox') # options.add_argument("--headless") # ///////////////// Init binary & driver new_driver_path = cwd + '/geckodriver/geckodriver' new_binary_path = '/usr/bin/firefox' options.binary_location = new_binary_path
def get_firefox_options(): options = Options() options.headless = True options.add_argument('window-size=2048x1080') return options
def browser(name=None, driver_path=None, grid_url=None): """ Run class initialization method, the default is proper to drive the Firefox browser. Of course, you can also pass parameter for other browser, Chrome browser for the "Chrome", the Internet Explorer browser for "internet explorer" or "ie". :param name: Browser name :param driver_path: Browser driver path :param grid_url: Either a string representing URL of the remote server or a custom remote_connection.RemoteConnection object. :return: """ if name is None: name = "chrome" if name in ["firefox", "ff"]: if driver_path is not None: return webdriver.Firefox(executable_path=driver_path) if grid_url is not None: return webdriver.Remote( command_executor=grid_url, desired_capabilities=DesiredCapabilities.FIREFOX.copy()) return webdriver.Firefox() elif name in ["chrome", "google chrome", "gc"]: if driver_path is not None: return webdriver.Chrome(executable_path=driver_path) if grid_url is not None: return webdriver.Remote( command_executor=grid_url, desired_capabilities=DesiredCapabilities.CHROME.copy()) return webdriver.Chrome() elif name == ["internet explorer", "ie", "IE"]: return webdriver.Ie() elif name == "opera": return webdriver.Opera() elif name == "chrome_headless": chrome_options = CH_Options() chrome_options.add_argument('--headless') if driver_path is not None: return webdriver.Chrome(chrome_options=chrome_options, executable_path=driver_path) return webdriver.Chrome(chrome_options=chrome_options) elif name == "firefox_headless": firefox_options = FF_Options() firefox_options.headless = True if driver_path is not None: return webdriver.Firefox(firefox_options=firefox_options, executable_path=driver_path) return webdriver.Firefox(firefox_options=firefox_options) elif name == 'edge': return webdriver.Edge() elif name == 'safari': return webdriver.Safari() elif name in PHONE_LIST: options = CH_Options() options.add_experimental_option("mobileEmulation", {"deviceName": name}) driver = webdriver.Chrome(chrome_options=options, executable_path=driver_path) driver.set_window_size(width=480, height=900) return driver elif name in PAD_LIST: options = CH_Options() options.add_experimental_option("mobileEmulation", {"deviceName": name}) driver = webdriver.Chrome(chrome_options=options, executable_path=driver_path) driver.set_window_size(width=1100, height=900) return driver else: raise NameError( "Not found '{}' browser, See the help doc: https://github.com/SeldomQA/seldom/blob/master/docs/driver.md'." .format(name))
def fill_EpiCoV_upload(uname, upass, seq, metadata, to, rt, iv, headless): """Download sequences and metadata from EpiCoV GISAID""" # add sequence to metadata metadata["sequence"] = seq # MIME types mime_types = "application/octet-stream" mime_types += ",application/excel,application/vnd.ms-excel" mime_types += ",application/pdf,application/x-pdf" print("Opening browser...") profile = webdriver.FirefoxProfile() profile.set_preference("browser.download.folderList", 2) profile.set_preference("browser.download.manager.showWhenStarting", False) profile.set_preference("browser.helperApps.neverAsk.saveToDisk", mime_types) profile.set_preference("plugin.disable_full_page_plugin_for_types", mime_types) profile.set_preference("pdfjs.disabled", True) options = Options() if headless: options.add_argument("--headless") driver = webdriver.Firefox(firefox_profile=profile, options=options) # driverwait driver.implicitly_wait(20) wait = WebDriverWait(driver, to) # open GISAID print("Opening website GISAID...") driver.get('https://platform.gisaid.org/epi3/frontend') waiting_sys_timer(wait) print(driver.title) assert 'GISAID' in driver.title # login print("Logining to GISAID...") username = driver.find_element_by_name('login') username.send_keys(uname) password = driver.find_element_by_name('password') password.send_keys(upass) driver.execute_script("return doLogin();") waiting_sys_timer(wait) # navigate to EpiFlu print("Navigating to EpiCoV...") epicov_tab = driver.find_element_by_xpath("//div[@id='main_nav']//li[3]/a") epicov_tab.click() waiting_sys_timer(wait) # access uploading page print("Accessing uploading page...") upload_tab = wait.until( EC.element_to_be_clickable( (By.CSS_SELECTOR, 'div.sys-actionbar-action:nth-child(4)'))) upload_tab.click() waiting_sys_timer(wait) # WARNING: different users might have different uploading options try: iframe = driver.find_element_by_xpath("//iframe") if iframe.is_displayed() and iframe.get_attribute('id').startswith( 'sysoverlay'): print("Popup window detected...") driver.switch_to.frame(iframe) button = wait.until( EC.presence_of_element_located((By.XPATH, "//td[1]"))) print("Choosing single upload option...") #button = driver.find_element_by_xpath('//td[1]') button.click() driver.switch_to.default_content() waiting_sys_timer(wait) except: pass # keyword mapping entry_keys_mapping = { # text 0: "virus_name", #Virus name*: hCoV-19/Country/Identifier/2020 1: "virus_passage", #Passage details/history*: Example: Original, Vero 2: "collection_date", #Collection date* Example: 2020-04-01 3: "location", #location*: Continent / Country / Region 4: "", #Additional location information: Example: Cave, Live animal market 5: "host", #Host* 6: "", #Additional host information: Example: Cruise Ship, Convention, Live animal market 7: "gender", #Gender* 8: "age", #Patient age* 9: "status", #Patient status: Example: Hospitalized, Released, Live, Deceased, unknown 10: "isolation_source", #Specimen source: Example: Nasal 11: "", #Outbreak Detail: Example: Date, Place, Family cluster 12: "", #Last vaccinated 13: "", #Treatment: Example: Include drug name, dosage 14: "sequencing_technology", #Sequencing technology: Nanopore MinION 15: "assembly_method", #Assembly method 16: "coverage", #Coverage 17: "", #Sample ID given by the provider 18: "", #Sample ID given by the Submitting lab # textarea 19: "originating_lab", #Originating lab* 20: "originating_address", #Originating lab address* 21: "submitting_lab", #Submitting lab*: Los Alamos National Lab 22: "submitting_address", #Submitting lab address* 23: "authors", #Authors* 24: "", #Submitter information: address 25: "sequence" #custom } # fill the webform text_inputs = driver.find_elements_by_xpath("//input[@type='text']") textareas = driver.find_elements_by_xpath("//textarea") num = 0 for inputs in text_inputs, textareas: for text_input in inputs: meta_key = entry_keys_mapping[num] if meta_key and meta_key in metadata: text_input.send_keys(metadata[meta_key]) num += 1 waiting_sys_timer(wait) if not headless: # wait until the user to close browser print("Please review the form and submit for review...") while True: try: _ = driver.window_handles except: print("Browser closed by user.") break time.sleep(1) else: button = driver.find_element_by_xpath( '//button[contains(text(), "Submit for Review")]') button.click() waiting_sys_timer(wait) warnings = driver.find_elements_by_xpath( "//div[@class='sys-form-fi-message']") for msg in warnings: if msg.is_displayed(): print(msg.text) # close driver driver.quit()
class ISIS(): def __init__(self, usr, pw, dldir, courseIDs, **kwargs): self.is_login = usr self.is_pw = pw self.dldir = dldir self.ids = courseIDs self.courseLink = 'https://isis.tu-berlin.de/course/resources.php?id=' self.options = Options() self.options.headless = True self.options.set_preference('browser.download.folderList', 2) self.options.set_preference( 'browser.download.manager.showWhenStarting', False) self.options.set_preference('browser.download.dir', self.dldir) self.options.set_preference('browser.helperApps.neverAsk.saveToDisk', 'application/msword, application/csv, application/ris, text/csv, image/png, image/jpg, image/jpeg, application/pdf, text/html, text/plain, application/zip, application/x-zip, application/x-zip-compressed, application/download, application/octet-stream') self.driver = webdriver.Firefox(options=self.options) self.wait = WebDriverWait(self.driver, 10) self.request = requests.Session() self.main() def main(self): self.login() self.dataFetcher() print(f'Finished fetching') self.driver.quit() def waiter(self, div): if div == '': div = 'div.tub-logo' self.wait.until(EC.visibility_of_element_located( (By.CSS_SELECTOR, div))) def login(self): print(f'Login in to Isis') self.driver.get('https://www.isis.tu-berlin.de/login/index.php') self.waiter('div.container') self.driver.find_element_by_id('shibboleth_login').click() self.waiter('div.contentContainer') self.driver.find_element_by_id('username').send_keys(self.is_login) self.driver.find_element_by_id('password').send_keys(self.is_pw) self.driver.find_element_by_id('login-button').click() # get cookies and forward them to the requests session cookiejar = self.driver.get_cookies() for cookie in cookiejar: self.request.cookies.set(cookie['name'], cookie['value']) print(f'Done') def downloader(self, path, url, name, folder): # redirect url to download url r = self.request.head(url, allow_redirects=True) current_url = r.url # if url == current_url then its not a pdf if current_url == url and folder == 0: self.driver.get(url) try: elem = self.driver.find_element_by_css_selector( '.resourceimage') current_url = elem.get_attribute('src') except NoSuchElementException: print(f'not an image') try: elem = self.driver.find_element_by_css_selector( '.resourceworkaround > a:nth-child(1)') url = elem.get_attribute('href') r = self.request.head(url, allow_redirects=True) current_url = r.url except NoSuchElementException: print(f'no workaround pdf either -> skipping file') pass # if folder == 1 create zip file name file_name = (current_url.split('/'))[-1] if folder == 1: file_name = name[1:].replace(' ', '_') + '.zip' # unqoute url to change %C3% to umlaut file_path = path + unquote(file_name) # remove forcedownload from zip filenames if file_path.endswith('?forcedownload=1'): file_path = file_path[:-16] file_name = file_name[:-16] # check if file exists, if not then download if not os.path.exists(file_path): print(f'Beginning file download {file_name}') t_start = time.time() data = self.request.get(current_url) with open(file_path, 'wb') as f: f.write(data.content) t_total = time.time() - t_start print(f'finished in {t_total} seconds\n') else: print(f'{file_name} already exists') def dataFetcher(self): for c, id_ in self.ids.items(): print(f'Course: {c}, id: {id_}') # go to resource page self.driver.get(self.courseLink + id_) # prepare download path if system == 'Windows': path = ISIS_dir + '\\' + c + '\\' else: path = ISIS_dir + '/' + c + '/' # find elements by class td,cell,c1 must include href find links to # file (not actually the file link) elems = self.driver.find_elements_by_css_selector( 'td.cell.c1 [href]') url_dict = dict() for elem in elems: url = elem.get_attribute('href') name = elem.get_attribute('text') url_dict[url] = name for url, name in url_dict.items(): # download regular files if 'resource' in url: self.downloader(path, url, name, 0) # download folder as .zip if 'folder' in url: print(f'Folder') url_id = url.split('?')[-1] f_url = 'https://isis.tu-berlin.de/mod/folder/download_folder.php?' + url_id self.downloader(path, f_url, name, 1)
def sel_init(): options = Options() #options.add_argument("--headless") browser = webdriver.Firefox(firefox_options=options, executable_path='/home/shelbyt/geckodriver') return browser
def __init__( self, cookies_folder_path: Optional[str] = None, extensions_folder_path: Optional[str] = None, host: Optional[str] = None, port: Optional[int] = None, cookies_id: Optional[str] = None, private: bool = False, screen_size: Optional[Tuple[int, int]] = None, # (width, height) full_screen: bool = True, headless: bool = False, language: str = 'en-us', manual_set_timezone: bool = False, user_agent: Optional[str] = None, load_proxy_checker_website: bool = False, disable_images: bool = False ): '''EITHER PROVIDE 'cookies_id' OR 'cookies_folder_path'. IF 'cookies_folder_path' is None, 'cokies_id', will be used to calculate 'cookies_folder_path' IF 'cokies_id' is None, it will become 'test' ''' if cookies_folder_path is None: cookies_id = cookies_id or 'test' current_folder_path = os.path.dirname(os.path.abspath(__file__)) general_cookies_folder_path = os.path.join(current_folder_path, 'cookies') os.makedirs(general_cookies_folder_path, exist_ok=True) cookies_folder_path = os.path.join(general_cookies_folder_path, cookies_id) self.cookies_folder_path = cookies_folder_path os.makedirs(self.cookies_folder_path, exist_ok=True) profile = webdriver.FirefoxProfile() if user_agent is not None: if user_agent == RANDOM_USERAGENT: user_agent_path = os.path.join(cookies_folder_path, 'user_agent.txt') if os.path.exists(user_agent_path): with open(user_agent_path, 'r') as file: user_agent = file.read().strip() else: user_agent = self.__random_firefox_user_agent(min_version=60.0) with open(user_agent_path, 'w') as file: file.write(user_agent) profile.set_preference("general.useragent.override", user_agent) if language is not None: profile.set_preference('intl.accept_languages', language) if private: profile.set_preference("browser.privatebrowsing.autostart", True) if disable_images: profile.set_preference('permissions.default.image', 2) profile.set_preference('dom.ipc.plugins.enabled.libflashplayer.so', False) if host is not None and port is not None: profile.set_preference("network.proxy.type", 1) profile.set_preference("network.proxy.http", host) profile.set_preference("network.proxy.http_port", port) profile.set_preference("network.proxy.ssl", host) profile.set_preference("network.proxy.ssl_port", port) profile.set_preference("network.proxy.ftp", host) profile.set_preference("network.proxy.ftp_port", port) profile.set_preference("network.proxy.socks", host) profile.set_preference("network.proxy.socks_port", port) profile.set_preference("network.proxy.socks_version", 5) profile.set_preference("signon.autologin.proxy", True) profile.set_preference("marionatte", False) profile.set_preference("dom.webdriver.enabled", False) profile.set_preference("media.peerconnection.enabled", False) profile.set_preference('useAutomationExtension', False) profile.set_preference("general.warnOnAboutConfig", False) profile.update_preferences() options = FirefoxOptions() if headless: options.add_argument("--headless") if screen_size is not None: options.add_argument("--width=" + str(screen_size[0])) options.add_argument("--height=" + str(screen_size[1])) self.driver = webdriver.Firefox(firefox_profile=profile, firefox_options=options) if full_screen: self.driver.fullscreen_window() if extensions_folder_path is not None: try: change_timezone_id = None for (dirpath, _, filenames) in os.walk(extensions_folder_path): for filename in filenames: if filename.endswith('.xpi') or filename.endswith('.zip'): addon_id = self.driver.install_addon(os.path.join(dirpath, filename), temporary=False) if 'change_timezone' in filename: change_timezone_id = addon_id # self.driver.get("about:addons") # self.driver.find_element_by_id("category-extension").click() # self.driver.execute_script(""" # let hb = document.getElementById("html-view-browser"); # let al = hb.contentWindow.window.document.getElementsByTagName("addon-list")[0]; # let cards = al.getElementsByTagName("addon-card"); # for(let card of cards){ # card.addon.disable(); # card.addon.enable(); # } # """) while len(self.driver.window_handles) > 1: time.sleep(0.5) self.driver.switch_to.window(self.driver.window_handles[-1]) self.driver.close() self.driver.switch_to.window(self.driver.window_handles[0]) if change_timezone_id is not None and manual_set_timezone: if host is not None and port is not None: self.open_new_tab('https://whatismyipaddress.com/') time.sleep(0.25) self.open_new_tab('https://www.google.com/search?client=firefox-b-d&q=my+timezone') time.sleep(0.25) self.driver.switch_to.window(self.driver.window_handles[0]) input('\n\n\nSet timezone.\n\nPress ENTER, when finished. ') while len(self.driver.window_handles) > 1: time.sleep(0.5) self.driver.switch_to.window(self.driver.window_handles[-1]) self.driver.close() self.driver.switch_to.window(self.driver.window_handles[0]) elif load_proxy_checker_website and host is not None and port is not None: self.driver.get('https://whatismyipaddress.com/') except: while len(self.driver.window_handles) > 1: time.sleep(0.5) self.driver.switch_to.window(self.driver.window_handles[-1]) self.driver.close()
import os import random import time import sys sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) from selenium import webdriver from selenium.webdriver import Firefox, DesiredCapabilities, ActionChains from selenium.webdriver.firefox.options import Options from scraping.scrapper import Connector firefox_path = "/Users/mathewzaharopoulos/dev/realestate_api/src/scraping/utilities/geckodriver" # Launch Browser in private mode. No cookies o = Options() o.add_argument('-private') driver = Firefox(firefox_options=o, executable_path=firefox_path) driver.install_addon('/Users/mathewzaharopoulos/dev/realestate_api/src/scraping/utilities/canvas_defender-1.1.0-fx.xpi') driver.install_addon('/Users/mathewzaharopoulos/dev/realestate_api/src/scraping/utilities/disable_webrtc-1.0.23-an+fx.xpi') driver.install_addon('/Users/mathewzaharopoulos/dev/realestate_api/src/scraping/utilities/random_user_agent-2.2.12-an+fx.xpi') driver.install_addon('/Users/mathewzaharopoulos/dev/realestate_api/src/scraping/utilities/canvasblocker-1.2-an+fx.xpi') driver.install_addon('/Users/mathewzaharopoulos/dev/realestate_api/src/scraping/utilities/spoof_timezone-0.2.3-an+fx.xpi') # Rest for extension settings driver.get('https://google.ca') #SETTINGS FOR PRIVATE MODE time.sleep(40)
from selenium import webdriver from selenium.common.exceptions import NoSuchElementException from selenium.webdriver.common.keys import Keys from selenium.webdriver.firefox.options import Options import time import sys firefox_options = Options() firefox_options.add_argument('--dns-prefetch-disable') firefox_options.add_argument('--no-sandbox') firefox_options.add_argument('--lang=en-US') browser = webdriver.Firefox(executable_path=r'D:\geckodriver', firefox_options=firefox_options) browser.get('https://www.facebook.com/') time.sleep(5) #convert to english lan = browser.find_elements_by_tag_name('a') for x in lan: print (x.text) if 'English (US)' in x.text.strip(): x.click() break time.sleep(2) #log in signup_elem = browser.find_element_by_id('email') signup_elem.send_keys('+1(479)345-0537') login_elem = browser.find_element_by_id('pass') login_elem.send_keys('0555514650') ins = browser.find_elements_by_tag_name('input')
EC.element_to_be_clickable( (By.XPATH, "//div[contains(@class, '_35DP2XbY0vnDR6ntQlSXMJ')]"))) items = driver.find_elements_by_xpath( "//pdo-product-item[contains(@class, 'P9eg53AkHYfXRP7gt5njS')]" ) except Exception as e: print('Erro categoria: ' + categoria[1] + ' - linha 164') return quantidadeProdutos try: inicial = datetime.now() print('Tarefa iniciou - ' + inicial.strftime('%Y-%m-%d %H:%M:%S')) option = Options() option.headless = True option.binary_location = r'C:\Program Files\Mozilla Firefox\firefox.exe' driver = webdriver.Firefox(options=option) print('Iniciar verificação de Categorias') categorias = carregarCategorias() print('Iniciar carregamento de produtos') for categoria in categorias: pagina = 0 print('A correr categoria ' + categoria[1] + ', página: ' + str(pagina + 1)) i = navPage(categoria, pagina) while i > 0: pagina += 1
from bluera_ctec_scraper import scrape_loaded_ctec_page from selenium import webdriver from selenium.webdriver.firefox.options import Options import json url = "https://northwestern.bluera.com/northwestern/rpv.aspx?lang=eng&redi=1&SelectedIDforPrint=9069f81e1fff8fccf3c222f29993048a5915b3189a57f260d07d1cec125f2b7dab8092ea6e4d5c72d1846903549a865b&ReportType=2®l=en-US" url = "https://northwestern.bluera.com/northwestern/rpv-eng.aspx?lang=eng&redi=1&SelectedIDforPrint=807ab7bd79d0e7aae1c8b01cdb1455f447af4f54f02d66461ff39ae900d0f598f570086e0d778da384e5bb246236f88d&ReportType=2®l=en-US" url = "https://northwestern.bluera.com/northwestern/rpv-eng.aspx?lang=eng&redi=1&SelectedIDforPrint=b78cc593e617e0f91b654f870f6df37ffcb8ef1e490aed4abce4511b61fbd81236e6fb1393ff043630bc3c90db32a08f&ReportType=2®l=en-US" options = Options() options.set_headless(True) firefox_profile = webdriver.FirefoxProfile() firefox_profile.set_preference('permissions.default.image', 2) firefox_profile.set_preference('dom.ipc.plugins.enabled.libflashplayer.so', 'false') driver = webdriver.Firefox(options=options, firefox_profile=firefox_profile, executable_path="/usr/local/bin/geckodriver") driver.set_window_size(1000, 1000) driver.get(url) scrape = scrape_loaded_ctec_page(driver) parsed = json.loads(json.dumps(scrape)) print(json.dumps(parsed, indent=4, sort_keys=True)) driver.quit()
sys.exit('Error when reading sdo.auth file!!!') def mymes(mes, d, plus_mark=True): k = 80 - len(mes) - 7 print(mes + '...', end='') for i in range(k): print('.', end='') sleep(d / k) if plus_mark: print('.[+]') else: print('....') opts = Options() opts.add_argument('--headless') opts.add_argument('--ignore-certificate-errors') mymes('Driver is starting now', 0, False) mymes("Please wait, don't close windows!", 0, False) # Download driver on https://github.com/mozilla/geckodriver/releases driver = webdriver.Firefox(options=opts, executable_path='../rgsu.config/geckodriver.exe') # Download Chrome driver if you use Google Chrome # https://sites.google.com/a/chromium.org/chromedriver/home # driver = webdriver.Chrome(chrome_options=opts, executable_path=r'chromedriver.exe') wait = WebDriverWait(driver, 20)
class ProcessElement: def __init__(self, url): self.non_type = ['checkbox', 'button', 'radio', 'submit', 'file', 'image', 'search', 'hidden'] self.types = ["text", "email", "tel", "number", "month"] self.url = url self.invisible = [] # all regex-matched input/select fields self.fields_all = [] # auto-fillable input/select fields self.fields_filled = [] self.clickable = [] # initial driver caps = DesiredCapabilities().FIREFOX caps["pageLoadStrategy"] = "normal" # complete binary_path = '**/mozilla-central/obj-x86_64-pc-linux-gnu/dist/bin/firefox' self.options = Options() self.options.headless = False self.options.set_preference("dom.push.enabled", False) self.options.set_preference("dom.webnotifications.enabled", False) self.options.binary_location = binary_path self.options.log.level = 'trace' self.log_file = './firefox_logs/log'+sys.argv[2]+'_'+timestr+'.txt' profile_path = '**/mozilla-central/obj-x86_64-pc-linux-gnu/tmp/' random.seed(ind) profile_nums = [str(i) for i in range(500)] ps = profile_path + 'profile'+random.choice(profile_nums) print(ps) try: self.driver = webdriver.Firefox(ps, desired_capabilities=caps, options=self.options, service_log_path=self.log_file) except Exception as e: print(e) pass # -------------------------------------------------------------- # main function # visit page -> extract input/select fields -> generate browser logs -> calculate visibility of auto-fillable els # -------------------------------------------------------------- def visit_page(self): i = 0 try: print('browsing page------', self.url) self.driver.get(self.url) self.detect_overlay() try: input_group = self.driver.find_elements_by_tag_name('input') input_group.reverse() i += 1 print('page:', i, '# of inputs:', len(input_group)) print('generating browsing logs---') for each_input in tqdm(input_group): if each_input.get_attribute('type') not in self.types and each_input.tag_name == 'input': continue for scroll_y in [-350, 0, 200]: try: self.driver.execute_script("arguments[0].scrollIntoView(true);", each_input) self.driver.execute_script("window.scrollBy(arguments[0], arguments[1])", 0, scroll_y) each_input.click() each_input.click() each_input.send_keys(Keys.DOWN) # save the clickable els for detection part self.clickable.append(each_input) break except: continue self.read_logs() if self.fields_filled: self.process_el() except: pass except Exception as exc: print(exc) pass try: self.driver.quit() except Exception as exc: print(str(exc)) pass # ------------------------------------------------ # identify if the el is root elements # ------------------------------------------------ def not_root(self, test_el): return test_el.tag_name not in ['body', 'html'] and test_el != self.driver.execute_script('return document;') # ------------------------------------------------ # find overlay/banner at the position # ------------------------------------------------ def find_overlay(self, each_size, min_width, min_height, max_height): test_el = self.driver.execute_script('return document.elementFromPoint(arguments[0], arguments[1]);', each_size[0], each_size[1]) if test_el and self.not_root(test_el): overlay = '' while not overlay and self.not_root(test_el): css_display = test_el.value_of_css_property('display') css_visibility = self.driver.execute_script('return getComputedStyle(arguments[0]).visibility;', test_el) css_position = self.driver.execute_script('return getComputedStyle(arguments[0]).position;', test_el) css_zindex = self.driver.execute_script('return getComputedStyle(arguments[0]).zIndex;', test_el) test_width = self.driver.execute_script('return arguments[0].offsetWidth', test_el) test_height = self.driver.execute_script('return arguments[0].offsetHeight', test_el) if css_display != 'none' and css_visibility != "hidden" \ and any(word in css_position for word in ['fixed', 'absolute']) \ and css_zindex != 'auto'\ and test_width >= min_width and min_height <= test_height: try: zindex_val = int(css_zindex) if zindex_val >= 0: overlay = test_el print('overlay ', css_display, css_visibility, css_position, css_zindex, test_width, test_height) overlay_single = self.is_single(overlay) if overlay_single: print('identify-overlay', overlay.tag_name, overlay.get_attribute('name'), overlay.get_attribute('id')) return overlay_single except: pass test_el = test_el.find_element_by_xpath('..') return False # ------------------------------------------------ # detect overlay at the center # detect banner at the bottom # ------------------------------------------------ def detect_overlay(self): win_width = self.driver.execute_script('return window.innerWidth') win_height = self.driver.execute_script('return window.innerHeight') min_width = 100 min_height = 100 any_size = [[win_width / 2, win_height / 2, min_width, min_height], [win_width / 2, win_height / 3, min_width, min_height]] full_screen = [win_width / 2, win_height / 2, win_width - 50, win_height - 50] for each_size in any_size: overlay_el = self.find_overlay(each_size, min_width, min_height, win_height) if overlay_el: self.remove_overlay(overlay_el) # remove immediately to detect more overlays overlay_el_full = self.find_overlay(full_screen, min_width, min_height, win_height) if overlay_el_full: self.remove_overlay(overlay_el_full) #bottom_min_width = win_width #bottom_min_height = 50 #bottom_size = [win_width / 2, win_height-50, bottom_min_width, bottom_min_height] #banner = self.find_overlay(bottom_size, bottom_min_width, bottom_min_height, 150) # if banner: # self.remove_overlay(banner) # ------------------------------------------------ # identify if the el is a single instance # ------------------------------------------------ def is_single(self, el): while self.not_root(el): el_name = el.get_attribute('name') el_id = el.get_attribute('id') el_class = el.get_attribute('class') el_tag_name = el.tag_name try: if el_id: print('overlay confirmed by id') return el elif el_name: if len(self.driver.find_elements_by_name(el_name)) == 1: print('overlay confirmed by name') return el elif el_class: if len(self.driver.find_elements_by_xpath ("//" + el_tag_name + "[@class='" + el_class + "'][@name='" + el_name + "']")) == 1: print('overlay confirmed by class and name') return el elif len(self.driver.find_elements_by_xpath(("//" + el_tag_name + "[@class='" + el_class + "']"))) == 1: print('overlay confirmed by class', el_class) return el elif len(self.driver.find_elements_by_tag_name(el_tag_name)) == 1: print('overlay confirmed by tag_name') return el el = el.find_element_by_xpath('..') except Exception as exc: print(exc) pass return False # ------------------------------------------------ # remove pop-up overlays or bottom banner # ------------------------------------------------ def remove_overlay(self, overlay_el): # set global CSS self.driver.execute_script("arguments[0].setAttribute('style', 'display:none !important');", overlay_el) print('overlay removed') html_el = self.driver.find_element_by_tag_name('html') body_el = self.driver.find_element_by_tag_name('body') self.driver.execute_script("arguments[0].setAttribute('style', 'overflow:auto !important');", html_el) self.driver.execute_script("arguments[0].setAttribute('style', 'overflow:auto !important');", body_el) # ------------------------------------------------ # create dict for each field # ------------------------------------------------ def create_dict(self, field_text, fill_flag): field = {} typecount_pattern = re.compile(r'(?<=typecount--" ).*?(?= ")') filltype_pattern = re.compile(r'(?<=filltype--" ").*?(?=")') id_pattern = re.compile(r'(?<=id--" ").*?(?=")') name_pattern = re.compile(r'(?<=name--" ").*?(?=")') class_pattern = re.compile(r'(?<=class--" ").*?(?=")') autocomplete_pattern = re.compile(r'(?<=autocomplete--" ").*?(?=")') value_pattern = re.compile(r'(?<=value--" ").*?(?=")') tag_pattern = re.compile(r'(?<=tag--" ").*?(?=")') hidden_pattern = re.compile(r'(?<=hidden--" ).*?(?= )') option_num_pattern = re.compile(r'(?<=options-num--" )\d+') outerHTML_pattern = re.compile(r'(?<=outerHTML--" ").*(?=")') preview_val_pattern = re.compile(r'(?<=previewvalue--" ").*?(?=")') if not fill_flag: field['type_count'] = re.search(typecount_pattern, field_text).group(0) else: field['fill_val'] = re.search(preview_val_pattern, field_text).group(0) field['fill_type'] = re.search(filltype_pattern, field_text).group(0) field["id"] = re.search(id_pattern, field_text).group(0) field["name"] = re.search(name_pattern, field_text).group(0) field['class'] = re.search(class_pattern, field_text).group(0) field['autocomplete'] = re.search(autocomplete_pattern, field_text).group(0) field['hidden'] = re.search(hidden_pattern, field_text).group(0) field['tag'] = re.search(tag_pattern, field_text).group(0) if field['tag'] == 'SELECT': field['options_num'] = re.search(option_num_pattern, field_text).group(0) else: field['value'] = re.search(value_pattern, field_text).group(0) # print('value', value_text) # # firefox does NOT autofill <input> with value # if value_text and fill_flag: # return False return field # ------------------------------------------------ # extract field info from driver logs # ------------------------------------------------ def read_logs(self): line_seen = set() print('parsing logs----') autofill_num_pattern = re.compile(r'(?<="autofill-num--" )\d+') autofill_num = 0 elem_seen = set() with open(self.log_file, 'r', encoding="utf-8") as f_log: content = f_log.readlines() for each_line in content: each_line_text = each_line.encode('utf-8').decode('unicode_escape') if each_line_text in line_seen: continue else: line_seen.add(each_line_text) if 'autofill-num--' in each_line_text: fill_num = re.search(autofill_num_pattern, each_line_text).group(0) if fill_num: autofill_num += int(fill_num) if 'field-matched--' in each_line_text: field = self.create_dict(each_line_text, 0) if field and field not in self.fields_all: self.fields_all.append(field) print('matched--', field) if 'preview--' in each_line_text: filled = self.create_dict(each_line_text, 1) print('preview--', filled) if filled: # identify an element by id, name, and type, # in some cases, class of the same element can be different filled_str = filled['id'] + filled['name'] + filled['fill_type'] if filled_str not in elem_seen: elem_seen.add(filled_str) self.fields_filled.append(filled) # the number can be different if the field has value print('all--', len(self.fields_all), 'autofill--', len(self.fields_filled), 'autofilled_num--', autofill_num) if len(self.fields_filled) > 0: self.write_to_file(self.fields_filled, firefox_autofill) def locate_by_att(self, attr, value, attr_list, tag_list): if attr == 'id': attr_els = self.driver.find_elements_by_id(value) else: attr_els = self.driver.find_elements_by_name(value) if len(attr_els) == 1: el = attr_els[0] print('element confirmed by ', attr) return el else: for attr_other in attr_list: for tag_name in tag_list: # loop though possible tag_names if attr_other[1]: attr_els = self.driver.find_elements_by_xpath\ ("//" + tag_name + "[@" + attr + "='" + value + "']" "[@" + attr_other[0] + "='" + attr_other[1] + "']") if len(attr_els) == 1: print('element confirmed by ', attr + ' and ', attr_other) return attr_els[0] return False def locate_el(self, field): attr_list = [['class', field['class']], ['autocomplete', field['autocomplete']]] tag_list = ['input', 'select'] # autofillable tags by firefox try: if field['id']: el = self.locate_by_att('id', field['id'], attr_list, tag_list) if el: return el elif field['name']: el = self.locate_by_att('name', field['name'], attr_list, tag_list) if el: return el else: for attr in attr_list: for tag_name in tag_list: if attr[1]: attr_els = self.driver.find_elements_by_xpath\ ("//" + tag_name + "[@" + attr[0] + "='" + attr[1] + "']") if len(attr_els) == 1: print('element confirmed by ', attr) return attr_els[0] except Exception as exc: print(exc) pass return False # ------------------------------------------------ # determine hidden elem and hidden reason # ------------------------------------------------ def calculate_visibility(self, input_el): el = hidden.Detection(self.driver, input_el, self.clickable) el.print_out() visibility = '' try: if el.display and el.click: if el.size_hidden(): visibility = 'hid_size' else: visibility = 'visible' else: if el.display_none_itself(): # display:none elem may return True for covered_up and off_screen function visibility = 'hid_disp_none' elif el.display_none_parent(): visibility = 'hid_disp_none_parent' elif el.visibility_hidden_itself(): visibility = 'hid_visi_hidden' elif el.visibility_hidden_parent(): visibility = 'hid_visi_hidden_parent' elif el.size_hidden(): visibility = 'hid_size' elif el.off_screen(): visibility = 'hid_off_screen' elif el.transparent_itself(): visibility = 'hid_transparent' elif el.transparent_parent(): visibility = 'hid_transparent_parent' elif el.clip_path_hidden(): visibility = 'hid_clip_path' elif el.ancestor_overflow_hidden(): visibility = 'hid_off_parents_overflow' elif el.clip_hidden(): visibility = 'hid_clipped_by_parent' elif el.covered_up(): visibility = 'hid_covered' elif not el.display and not el.click: visibility = 'hid_other_reason' else: visibility = 'visible' except Exception as exc: print(exc) pass return visibility # ------------------------------------------------ # determine hidden elem and hidden reason # ------------------------------------------------ def process_el(self): hid_flag = 0 vis_flag = 0 hid_types = set() self.fields_filled.reverse() fields_filled = copy.deepcopy(self.fields_filled) for ind, field in enumerate(fields_filled): print('---', ind+1) input_el = self.locate_el(field) if not input_el: continue field['visibility'] = self.calculate_visibility(input_el) if field['visibility'] != 'visible': hid_flag = 1 hid_types.add(field['fill_type']) field = self.extract_comments(input_el, field) self.driver.get_screenshot_as_file('screenshots/' + sys.argv[2] + '_' + timestr + '.png') elif field['visibility'] == 'visible' and field['tag'] != 'SELECT': vis_flag = 1 self.invisible.append(field) print(field['visibility'], field) if vis_flag and hid_flag: self.write_to_file(self.invisible, firefox_invisible) all_visibility = [] self.fields_all.reverse() print('start looking for corresponding visible ones') for index, field in enumerate(self.fields_all): # if field['fill_type'] not in hid_types: # continue print('---', index + 1) # if field not in self.fields_filled: input_el = self.locate_el(field) if not input_el: continue field['visibility'] = self.calculate_visibility(input_el) if field not in all_visibility: all_visibility.append(field) print(field['visibility'], field) self.write_to_file(all_visibility, firefox_all_visibility) def write_to_file(self, data, output_file): output = {} output['inputs'] = data output['url'] = self.url output_file.write(json.dumps(output, ensure_ascii=False) + '\n') output_file.flush() def extract_comments(self, el, field): comment_pattern = re.compile(r'(?<=<!--).*?(?=-->)') html_source = str(self.driver.page_source) source_ls = list(html_source.split('\n')) el_html = el.get_attribute('outerHTML') for i in range(len(source_ls)): if el_html in source_ls[i]: start_line = 0 if i < 5 else (i-5) comments_ls = re.findall(comment_pattern, ' '.join(source_ls[start_line:i])) if comments_ls: field['comments'] = comments_ls return field
def scrape(dates, region, browser_viz): """ Scrape current and forecast conditions and problem text from AvalancheCanada.ca historical page """ # empty list for current, current+1, current+2 forecast conditions and text problems current_conditions = [] current_plus_1_conditions = [] current_plus_2_conditions = [] problem_conditions = [] # webdriver options options = Options() if browser_viz == 'Yes': options.headless = False else: options.headless = True # initialize selenium web driver base_url = 'https://www.avalanche.ca/forecasts/archives/{}/{}' driver = webdriver.Firefox(options=options) driver.get(base_url.format(region, dates[0])) time.sleep(3) # slight pause for driver to load # scrape avalanche canada data for each date for date in dates: date_plus_1 = (pd.to_datetime(date) + pd.Timedelta('1 days')).strftime('%Y-%m-%d') # tomorrows date date_plus_2 = (pd.to_datetime(date) + pd.Timedelta('2 days')).strftime('%Y-%m-%d') # day after tomorrows date # locate relevant avalanche data in web driver, this is hard coded to the page JavaScript alpine_element = driver.find_elements_by_xpath( "//*[@id='app']//*[@transform = 'translate(385 211)']//*[@x = '70']") treeline_element = driver.find_elements_by_xpath( "//*[@id='app']//*[@transform = 'translate(405 261)']//*[@x = '70']") belowtree_element = driver.find_elements_by_xpath( "//*[@id='app']//*[@transform = 'translate(425 311)']//*[@x = '70']") forecast_element = driver.find_elements_by_xpath( "//*[@id='app']//*[@class='_2tSd']//*[@class='Xgfa undefined _2j-o _2iRE']") problem_element = driver.find_elements_by_xpath("//*[@id='app']//*[@class='_1rb7']") # if no conditions exist then insert empty to list if not alpine_element or not treeline_element or not belowtree_element or not forecast_element: current_conditions.append([date]) current_plus_1_conditions.append([date_plus_1]) current_plus_2_conditions.append([date_plus_2]) problem_conditions.append([]) else: # if conditions exist then parse data alpine_conditions = [condition.text for condition in alpine_element] treeline_conditions = [condition.text for condition in treeline_element] belowtree_conditions = [condition.text for condition in belowtree_element] future_conditions = [condition.text for condition in forecast_element] problems = [prob.text for prob in problem_element] # store data for current and forecast conditions and problem text, split based on output format from page current_conditions.append([date, alpine_conditions[0].split(' - ')[-1], int(alpine_conditions[0].split(' - ')[0]), treeline_conditions[0].split(' - ')[-1], int(treeline_conditions[0].split(' - ')[0]), belowtree_conditions[0].split(' - ')[-1], int(belowtree_conditions[0].split(' - ')[0])]) current_plus_1_conditions.append( [date_plus_1, future_conditions[0].split(' - ')[-1], int(future_conditions[0].split(' - ')[0]), future_conditions[1].split(' - ')[-1], int(future_conditions[1].split(' - ')[0]), future_conditions[2].split(' - ')[-1], int(future_conditions[2].split(' - ')[0])]) current_plus_2_conditions.append( [date_plus_2, future_conditions[3].split(' - ')[-1], int(future_conditions[3].split(' - ')[0]), future_conditions[4].split(' - ')[-1], int(future_conditions[4].split(' - ')[0]), future_conditions[5].split(' - ')[-1], int(future_conditions[5].split(' - ')[0])]) problem_conditions.append(problems) # slight pause for driver to load driver.get(base_url.format(region, date_plus_1)) time.sleep(5) driver.quit() # close selenium driver return current_conditions, current_plus_1_conditions, current_plus_2_conditions, problem_conditions
def scrape_news(): # get user settings driver = config.get('your_settings', 'driver') search_topic = config.get('your_settings', 'search_topic') # set up driver PATH_TO_DRIVER = "./%s" % driver if driver == 'geckodriver': firefox_options = Options_firefox() # run in headless mode firefox_options.headless = True # disable cookies to prevent popups firefox_pref = webdriver.FirefoxProfile() firefox_pref.set_preference("network.cookie.cookieBehavior", 2) browser = webdriver.Firefox(executable_path=PATH_TO_DRIVER, options=firefox_options, firefox_profile=firefox_pref) elif driver == 'chromedriver': chrome_options = Options_chrome() # run in headless mode chrome_options.add_argument('--headless') # disable cookies to prevent popups chrome_options.add_experimental_option( 'prefs', {'profile.default_content_setting_values.cookies': 2}) browser = webdriver.Chrome(executable_path=PATH_TO_DRIVER, options=chrome_options) else: print('ERROR: driver not supported') print('Getting search results...') # open URL browser.get('https://google.com') # select google search bar google_search = browser.find_element_by_name('q') # type news topic to search google_search.send_keys(search_topic) google_search.send_keys(Keys.ENTER) browser.implicitly_wait(5) browser.find_element_by_css_selector('a[data-sc="N"]').click() browser.implicitly_wait(5) # get all elements containing news title all_headings = browser.find_elements_by_xpath( '//div[contains(@role, "heading") and contains(@aria-level, "2")]') # get all elements containing links for each news title all_links = browser.find_elements_by_xpath('//g-card/div/div/div[2]/a') #open file for writing file = open(newsletter_file, 'w') # loop over each title and link, print each to the file for heading, link in zip(all_headings, all_links): file.write('\n\n') file.write(heading.text) file.write('\n') file.write(link.get_attribute('href')) browser.close() print('Done. Search results exported to "newsletter.txt"') pass
def setUpClass(cls): super().setUpClass() options = Options() options.headless = True cls.browser = WebDriver(options=options) cls.browser.implicitly_wait(10)
"авг 2018_bets", "сен 2018_profit", "сен 2018_bets", "окт 2018_profit", "окт 2018_bets", "ноя 2018_profit", "ноя 2018_bets", "дек 2018_profit", "дек 2018_bets", "янв 2019_profit", "янв 2019_bets", "фев 2019_profit", "фев 2019_bets", "март 2019_profit", "март 2019_bets", "апр 2019_profit", "апр 2019_bets", "май 2019_profit", "май 2019_bets", "июнь 2019_profit", "июнь 2019_bets", "июль 2019_profit", "июль 2019_bets", "авг 2019_profit", "авг 2019_bets", "сен 2019_profit", "сен 2019_bets", "окт 2019_profit", "окт 2019_bets", "ноя 2019_profit", "ноя 2019_bets", "дек 2019_profit", "дек 2019_bets", "янв 2020_profit", "янв 2020_bets", "фев 2020_profit", "фев 2020_bets", "март 2020_profit", "март 2020_bets", "апр 2020_profit", "апр 2020_bets", "май 2020_profit", "май 2020_bets", "июнь 2020_profit", "июнь 2020_bets", "июль 2020_profit", "июль 2020_bets" ] # web driver call options = Options() options.headless = False browser = webdriver.Firefox(options=options) browser.get('https://vprognoze.ru/') cookies = [{ 'name': 'rerf', 'value': 'AAAAAF8J3vmD0NVBAwW4Ag==', 'path': '/', 'domain': 'vprognoze.ru', 'secure': False, 'httpOnly': False, 'expiry': 1597074425 }, { 'name': 'ipp_uid', 'value': '1594482425332/RgXs8PxlChvqPFvX/eHxM4v/yJsnvA1c4leNDmQ==', 'path': '/',
def scrape(self): # Create CSV file options = Options() options.add_argument("--headless") driver = webdriver.Firefox(firefox_options=options) driver2 = webdriver.Firefox(firefox_options=options) f = csv.writer(open("After_Hours_Variances.csv", "w")) f.writerow([ "AHVURL", "jobNumber", "referenceNumber", "status", "entryDate", "filingType", "houseNumber", "streetName", "borough", "BIN", "name", "businessName", "licenseNumber", "nearResidence", "enclosedBuilding", "demolition", "crane", "requested", "approved", "startDay", "days", "hoursFrom", "hoursTo", "reason", "approvedReason", "description" ]) # Write column headers as the first line with open('MNCD1BINS.csv') as csvfile: BINList = csv.reader(csvfile) for BIN in BINList: AHVBINURL = 'http://a810-bisweb.nyc.gov/bisweb/AHVPermitsQueryByNumberServlet?requestid=1&allkey=' + str( BIN[0]) + '&fillerdata=A' driver2.get(AHVBINURL) print AHVBINURL while True: BINHTML = driver2.page_source AHVBINSoup = BeautifulSoup(BINHTML, "lxml") print AHVBINSoup.title.string AHVReferenceNumberTable = AHVBINSoup.findAll('table')[3] print 'starting page' for row in AHVReferenceNumberTable.findAll('tr')[1:]: referenceNo = row.findAll('td')[0].a.string AHVURL = "http://a810-bisweb.nyc.gov/bisweb/AHVPermitDetailsServlet?requestid=2&allkey=" + referenceNo print AHVURL #AHVRequest = requests.get(AHVURL) driver.get(AHVURL) AHVHTML = driver.page_source AHVSoup = BeautifulSoup(AHVHTML, "lxml") print AHVSoup.title.string AHVTablePremises = AHVSoup.findAll('table')[2] AHVTableFiling = AHVSoup.findAll('table')[3] AHVTableLocation = AHVSoup.findAll('table')[4] AHVTableContractor = AHVSoup.findAll('table')[5] AHVTableVariance = AHVSoup.findAll('table')[6] jobNumber = AHVTablePremises.findAll('tr')[0].findAll( 'td')[1].a.string referenceNumber = AHVTablePremises.findAll( 'tr')[1].findAll('td')[1].string.replace( 'Reference Number: ', "") findStatus = AHVTableFiling.find('td', text='Status:') status = findStatus.findNext('td').string #status = AHVTableFiling.findAll('tr')[3].findAll('td')[4].string entryDate = AHVTableFiling.findAll('tr')[4].findAll( 'td')[5].string filingType = AHVTableFiling.findAll('tr')[5].findAll( 'td')[3].string houseNumber = AHVTableLocation.findAll( 'tr')[2].findAll('td')[1].string streetName = AHVTableLocation.findAll('tr')[2].findAll( 'td')[3].string borough = AHVTableLocation.findAll('tr')[3].findAll( 'td')[1].string BIN = AHVTableLocation.findAll('tr')[3].findAll( 'td')[7].a.string name = AHVTableContractor.findAll('tr')[2].findAll( 'td')[1].string businessName = AHVTableContractor.findAll( 'tr')[3].findAll('td')[1].string licenseNumber = AHVTableContractor.findAll( 'tr')[6].findAll('td')[3].a.string residenceYes = AHVTableVariance.findAll( 'tr')[2].findAll('td')[1].findAll('img') for images in residenceYes: if images['src'] == "images/box_check.gif": nearResidence = "yes" else: nearResidence = "no" enclosedBuildingYes = AHVTableVariance.findAll( 'tr')[3].findAll('td')[1].findAll('img') for images in enclosedBuildingYes: if images['src'] == "images/box_check.gif": enclosedBuilding = "yes" else: enclosedBuilding = "no" demolitionYes = AHVTableVariance.findAll( 'tr')[4].findAll('td')[1].findAll('img') for images in demolitionYes: if images['src'] == "images/box_check.gif": demolition = "yes" else: demolition = "no" craneYes = AHVTableVariance.findAll('tr')[5].findAll( 'td')[1].findAll('img') for images in craneYes: if images['src'] == "images/box_check.gif": crane = "yes" else: crane = "no" requested = AHVTableVariance.findAll('tr')[6].findAll( 'td')[0].text.replace(u'\xa0', "").replace( 'Total Days Requested:', "").encode('utf-8') approved = AHVTableVariance.findAll('tr')[7].findAll( 'td')[0].text.replace(u'\xa0', "").replace( 'Total Days Approved:', "").encode('utf-8') startDay = AHVTableVariance.findAll('tr')[10].findAll( 'td')[0].string days = AHVTableVariance.findAll('tr')[10].findAll( 'td')[1].string try: hoursFrom = AHVTableVariance.findAll( 'tr')[10].findAll('td')[2].string except: hoursFrom = 'null' try: hoursTo = AHVTableVariance.findAll( 'tr')[10].findAll('td')[3].string except: hoursTo = 'null' findReason = AHVTableVariance.findAll( 'b', text=re.compile('Apply Reason:.*'))[0] reason = findReason.parent.text.replace( u'\xa0', "").replace('Apply Reason:', "").strip().encode('utf-8') try: findApprovedReason = AHVTableVariance.findAll( 'b', text=re.compile('Approved:.*'))[1] approvedReason = findApprovedReason.parent.text.replace( u'\xa0', "").replace('Approved:', "").strip().encode('utf-8') except IndexError: approvedReason = 'null' try: findDescription = AHVTableVariance.find( 'td', text='Description of Work:') description = findDescription.findNext('td').string except IndexError: description = 'null' f.writerow([ AHVURL, jobNumber, referenceNumber, status, entryDate, filingType, houseNumber, streetName, borough, BIN, name, businessName, licenseNumber, nearResidence, enclosedBuilding, demolition, crane, requested, approved, startDay, days, hoursFrom, hoursTo, reason, approvedReason, description ]) print 'page complete' try: nextPageElem = driver2.find_element_by_name('next') print 'success' except NoSuchElementException: break nextPageElem.click() print 'clicked' driver.quit() driver2.quit()
class EkosSelenium: '''Class for accessing and downloading items from Ekos using Selenium Webdriver''' # Config conf_file = './DeliveryFormat/config_EXAMPLE.yaml' #PATH to config file stream = file(conf_file, 'r') config = yaml.safe_load(stream) # Firefox Settings. Need to import from selenium.webdriver.firefox.firefox_profile import FirefoxProfile gPATH = config['gPATH'] # FIREFOX PROFILE - PREVENTS DOWNLOAD DIALOGS profile = FirefoxProfile() profile.set_preference("browser.download.folderList", 2) #set download location as custom dir profile.set_preference( "browser.download.dir", config['PATH']) #sets custom dir - NEED PATH ON LOCAL MACHINE profile.set_preference("browser.helperApps.neverAsk.openFile", "text/csv,application/vnd.ms-excel") profile.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/csv,application/vnd.ms-excel") # Firefox Options - Allows Firefox to run in headless mode options = Options() options.add_argument('-headless') browser = webdriver.Firefox( firefox_profile=profile, executable_path=gPATH, # Need executable_path for cron firefox_options=options) # chrome_options = Options() # chrome_options.add_argument('--headless') # chrome_options.add_argument('--window-size=1920x1080') # browser = webdriver.Chrome() # def __init__(self): # self.logger = logging.getLogger(__name__) def login(self, username, password): '''logs in to Ekos using credentials provided by user handle any alerts that may occur during log in''' #open webdriver, go to Ekos login page logger.info('Logging in to Ekos') browser = EkosSelenium.browser browser.get('https://login.goekos.com/default.aspx') assert "Ekos" in browser.title #enter login credentials and log in elem = browser.find_element_by_id('txtUsername') elem.send_keys(username) elem = browser.find_element_by_id('txtPassword') elem.send_keys(password) elem.send_keys(Keys.RETURN) #handle alert that might occur upon login try: WebDriverWait(browser, 3).until(EC.alert_is_present()) alert = browser.switch_to.alert() alert.accept() logger.info("Alert Accepted") except TimeoutException: logger.info("No Alert") logger.info("Login Successful") return def download_report(self, reportname): '''Clicks Report link and downloads report provided by user''' browser = EkosSelenium.browser try: WebDriverWait(browser, 3).until(EC.alert_is_present()) alert = browser.switch_to.alert() alert.accept() logger.info("Alert Accepted") except TimeoutException: logger.info("No Alert") #Get and click on Reports Tab elem = WebDriverWait(browser, 10).until( EC.element_to_be_clickable((By.LINK_TEXT, 'Reports'))) elem.click() #Click reportname link while True: try: logger.info("Downloading %s as csv" % str(reportname)) elem = WebDriverWait(browser, 10).until( EC.element_to_be_clickable((By.LINK_TEXT, reportname))) elem.click() #download csv browser.implicitly_wait(10) # time.sleep(5) # time.sleeps useful with Chrome browser.switch_to.frame('formFrame_0') # time.sleep(5) elem = WebDriverWait(browser, 10).until( EC.element_to_be_clickable( (By.CLASS_NAME, 'buttonGroupInner'))) elem.click() dltime = datetime.today() #capture time of download elem = WebDriverWait(browser, 10).until( EC.element_to_be_clickable((By.ID, 'csv_export'))) # time.sleep(5) elem.click() # time.sleep(5) #close iframe browser.switch_to.default_content() elem = browser.find_element_by_class_name('formClose') elem.click() except NoSuchFrameException: logger.warning('NoSuchFrameException: Restarting DL process') except ElementClickInterceptedException: logger.warning( 'ElementClickInterceptedException: Closing iframe') browser.switch_to.default_content() elem = browser.find_element_by_class_name('formClose') elem.click() except InsecureCertificateException: logger.warning( 'Insecure Certificate Exception. Ending browser session') self.browser.quit() except UnexpectedAlertPresentException: alert = browser.switch_to.alert() alert.accept() logger.warning('Unexpected Alert Accepted') else: # logger.warning('UNKNOWN ERROR: Unable to complete download') break return dltime def quit(self): '''quits webdriver''' EkosSelenium.browser.quit() return
from selenium import webdriver from selenium.webdriver.firefox.options import Options options = Options() #options.headless = True; driver = webdriver.Firefox(executable_path='/usr/local/bin/geckodriver', firefox_options=options) driver.get('https://www.naver.com')
def HeadlessBrowserInstance1(): start = time.time() # Record variable with empty array Record = [] # calling FetchRecord and storing return values i.e. URL and DOMAIN in Record[] Record = FetchRecord() url = Record[0] domain = Record[1] flag1 = int(1) try: # Updating FLAG1 in database for specific URL so another browser instance works on different URL updateflag1 = '''update Test set FLAG1=? where URL=?''' ExecuteQuery.execute(updateflag1, (flag1, url)) DatabaseConnect.commit() except Exception as e: print(e) # storing arguments of headless browser in by importing Options parameters = Options() # --headless forces browser instance to work in headless mode parameters.add_argument("--headless") # creating webdriver variable for firefox by passing parameters and adding Gecko web driver to call firefox via python browser = webdriver.Firefox(firefox_options=parameters, executable_path=r'C:\\webdriver\\geckodriver') # maximizing window of headless firefox browser.maximize_window() try: # Passing URL in headlesss firefox browser.get(url) except Exception as e: print(e) # generating Unique MD2 hash of each URL and convert them into string starthashtime = time.time() hashurl = str.encode(url) SHAVALUE = MD2.new(hashurl) endhashtime = time.time() # Capturing screenshot of webpage as PNG capturescreen = browser.get_screenshot_as_png() # quality is a parameter used while converting png to jpeg quality = 100 # converting PNG into Bytes and then into jpeg try: img = Image.open(io.BytesIO(capturescreen)) img = img.convert("RGB") except Exception as e: print(e) # creating subfolders with domain as naming convention under ScreenShots folder try: os.makedirs(os.path.join(r".\\ScreenShots\\", str(domain))) except Exception as e: print(e) # FolderPath stores path for that particular domain FolderPath = os.path.join(r'.\\ScreenShots\\', str(domain) + '\\') # Glob loop to scan all the domain directories in ScreenShots folder for folders in glob.glob(r'.\\ScreenShots\\*'): # If domain folder exists store converted screenshot of that URL in that specific domain if os.path.exists(FolderPath): StoragePath = os.path.join(r'.\\ScreenShots', str(domain) + '\\') # saving Image by specifying quality and setting image optimization to true try: img.save(str(StoragePath) + str(SHAVALUE.hexdigest()) + '.jpeg', 'JPEG', quality=quality, optimise=True) except Exception as e: print(e) # If domain folder doesn't exist create new folder and store image else: os.makedirs(os.path.join(r".\\ScreenShots\\", str(domain))) StoragePath = os.path.join(r'.\\ScreenShots', str(domain) + '\\') try: img.save(str(StoragePath) + str(SHAVALUE.hexdigest()) + '.jpeg', 'JPEG', quality=quality, optimise=True) except Exception as e: print(e) # Converting hash into string, flag into integer, path into string hash = str(SHAVALUE.hexdigest()) flag = int(1) Location = str(StoragePath) + hash + '.jpeg' Location = Location.strip(".\\") # updating record in database by updating flags, hash and path of that image try: updateHASH = ( '''UPDATE "Test" SET FLAG=?, HASH=?, PATH=? WHERE URL = ?''') ExecuteQuery.execute(updateHASH, (flag, hash, Location, url)) DatabaseConnect.commit() except Exception as e: print(e) # Safely closing browser instance browser.close() browser.quit() end = time.time()
from selenium.webdriver import Firefox from selenium.webdriver.firefox.options import Options from selenium.common.exceptions import NoSuchElementException opts = Options() browser = Firefox(executable_path=r"C:\geckodriver-v0.27.0\geckodriver.exe", options=opts) browser.implicitly_wait(3) browser.get('<url>') search_form = browser.find_element_by_name('adminpw') search_form.send_keys('<password>') search_form.submit() try: field = browser.find_element_by_name('discardalldefersp') field.click() browser.implicitly_wait(3) submit = browser.find_element_by_name('submit') submit.click() except NoSuchElementException: print('No new messages to be discarded') browser.implicitly_wait(3) fields = browser.find_elements_by_xpath("//input[@value='3']") emails = browser.find_elements_by_xpath('//td[contains(text(),"@")]') banfields = browser.find_elements_by_xpath('//input[contains(@name,"ban-")]') if len(fields) == 0: print('No new requests to be discarded, closing browser')
from selenium import webdriver from selenium.webdriver.firefox.options import Options from fake_useragent import UserAgent import os opts = Options() opts.set_headless() assert opts.headless ### In this example i will use a range of numbered pages to extract info def scrapper(path, rangeofnumbered, iplist, geckopath, waitingtime=1, interestedpattern, vpnon, vpnoff): ''' This Web Scrapper is useful even with Distil Network protection :param path: str the domain of the page of interested :param rangeofnumbered: range Range of the numbered pages of interested :param dateobtained: tuple The data of interested :param iplist: list list of ips that you want to use :param geckopath: str the gecko driver path in your computer :param waitingtime: int seconds to wait for any visit default = 1
def test_to_capabilities(self): opts = Options() assert opts.to_capabilities() == DesiredCapabilities.FIREFOX profile = FirefoxProfile() opts.profile = profile caps = opts.to_capabilities() assert "moz:firefoxOptions" in caps assert "profile" in caps["moz:firefoxOptions"] assert isinstance(caps["moz:firefoxOptions"]["profile"], basestring) assert caps["moz:firefoxOptions"]["profile"] == profile.encoded opts.add_argument("--foo") caps = opts.to_capabilities() assert "moz:firefoxOptions" in caps assert "args" in caps["moz:firefoxOptions"] assert caps["moz:firefoxOptions"]["args"] == ["--foo"] binary = FirefoxBinary() opts.binary = binary caps = opts.to_capabilities() assert "moz:firefoxOptions" in caps assert "binary" in caps["moz:firefoxOptions"] assert isinstance(caps["moz:firefoxOptions"]["binary"], basestring) assert caps["moz:firefoxOptions"]["binary"] == binary._start_cmd opts.set_preference("spam", "ham") caps = opts.to_capabilities() assert "moz:firefoxOptions" in caps assert "prefs" in caps["moz:firefoxOptions"] assert isinstance(caps["moz:firefoxOptions"]["prefs"], dict) assert caps["moz:firefoxOptions"]["prefs"]["spam"] == "ham"
def __init__( self, client="firefox", username="******", proxy=None, command_executor=None, loadstyles=False, profile=None, headless=False, autoconnect=True, logger=None, extra_params=None, chrome_options=None, executable_path=None, ): """Initialises the webdriver""" self.logger = logger or self.logger extra_params = extra_params or {} if profile is not None: self._profile_path = profile self.logger.info("Checking for profile at %s" % self._profile_path) if not os.path.exists(self._profile_path): self.logger.critical("Could not find profile at %s" % profile) raise WhatsAPIException("Could not find profile at %s" % profile) else: self._profile_path = None self.client = client.lower() if self.client == "firefox": if self._profile_path is not None: self._profile = webdriver.FirefoxProfile(self._profile_path) else: self._profile = webdriver.FirefoxProfile() if not loadstyles: # Disable CSS self._profile.set_preference("permissions.default.stylesheet", 2) # Disable images self._profile.set_preference("permissions.default.image", 2) # Disable Flash self._profile.set_preference( "dom.ipc.plugins.enabled.libflashplayer.so", "false") if proxy is not None: self.set_proxy(proxy) options = Options() if headless: options.set_headless() options.profile = self._profile capabilities = DesiredCapabilities.FIREFOX.copy() capabilities["webStorageEnabled"] = True self.logger.info("Starting webdriver") if executable_path is not None: executable_path = os.path.abspath(executable_path) self.logger.info("Starting webdriver") self.driver = webdriver.Firefox( capabilities=capabilities, options=options, executable_path=executable_path, **extra_params, ) else: self.logger.info("Starting webdriver") self.driver = webdriver.Firefox(capabilities=capabilities, options=options, **extra_params) elif self.client == "chrome": self._profile = webdriver.ChromeOptions() if self._profile_path is not None: self._profile.add_argument("user-data-dir=%s" % self._profile_path) if proxy is not None: self._profile.add_argument("--proxy-server=%s" % proxy) if headless: self._profile.add_argument("headless") if chrome_options is not None: for option in chrome_options: self._profile.add_argument(option) self.logger.info("Starting webdriver") self.driver = webdriver.Chrome(chrome_options=self._profile, **extra_params) elif client == "remote": if self._profile_path is not None: self._profile = webdriver.FirefoxProfile(self._profile_path) else: self._profile = webdriver.FirefoxProfile() capabilities = DesiredCapabilities.FIREFOX.copy() self.driver = webdriver.Remote( command_executor=command_executor, desired_capabilities=capabilities, **extra_params, ) else: self.logger.error("Invalid client: %s" % client) self.username = username self.wapi_functions = WapiJsWrapper(self.driver, self) self.driver.set_script_timeout(500) self.driver.implicitly_wait(10) if autoconnect: self.connect()
empty_space.click().perform() driver.find_element_by_xpath("/html/body/div[8]/ul/li[1]/a").click() driver.find_element_by_link_text("予定を登録する").click() time.sleep(2) except: calenderSpace() userRecords = getSalonboarUser() url = "https://salonboard.com/login/" # username = "******" # password = "******" # username = sb_username # password = sb_password options = Options() options.add_argument('-headless') # driver = webdriver.Firefox(executable_path=r'/usr/local/bin/geckodriver') driver = webdriver.Firefox(executable_path=r'/usr/local/bin/geckodriver', options=options) print("before driver") if __name__ == "__main__": #driver = webdriver.Firefox() #driver = webdriver.Remote(command_executor='http://localhost:4444/wd/hub', desired_capabilities=webdriver.DesiredCapabilities.FIREFOX) print(type(int(reservation_type))) if (username != '' and password != '' and reservation_type == 1): driver.get(url) print("before login")
Please Dont't use it in spam messages. ''' from selenium.webdriver.firefox.options import Options from selenium import webdriver import psutil,os from PIL import Image from time import strftime,sleep print("Please Wait Starting whatsapp-bomber") os.system('notify-send "-i" call-start "Whatsapp bomber Start" "Developed By RIZWAN AHMAD([email protected])"') options = Options() options.headless = True driver = webdriver.Firefox(options=options) driver.get("http://web.whatsapp.com") def close(): for proc in psutil.process_iter(): if proc.name() == "display": proc.kill() print("QR Code Generating") sleep(2)