def initialize_browser(for_scenario_2=False): browser = None if for_scenario_2: # Test Scenario 2 requires users to download things from their browser. # Define a custom profile for Firefox, to automatically download files that a page asks user to download, without asking. This is because Selenium can't control downloads. profile = webdriver.FirefoxProfile() profile.set_preference('browser.download.folderList', 2) # Can be set to either 0, 1, or 2. When set to 0, Firefox will save all files downloaded via the browser on the user's desktop. When set to 1, these downloads are stored in the Downloads folder. When set to 2, the location specified for the most recent download is utilized again. profile.set_preference('browser.download.manager.showWhenStarting', False) profile.set_preference('browser.download.dir', settings.BROWSER_DOWNLOAD_FOLDER) mime_types_that_should_be_downloaded = ['text/plain', 'application/json'] profile.set_preference('browser.helperApps.neverAsk.saveToDisk', ';'.join(mime_types_that_should_be_downloaded)) if settings.USE_HEADLESS_BROWSER: from selenium.webdriver.firefox.options import Options options = Options() options.add_argument("--headless") options.log.level = "trace" if for_scenario_2: browser = webdriver.Firefox(profile, options=options) else: browser = webdriver.Firefox(options=options) else: if for_scenario_2: browser = webdriver.Firefox(profile) else: browser = webdriver.Firefox() # browser.maximize_window() # make the browser window use all available screen space. FIXME: When enabled, some clicks are not triggered anymore browser.implicitly_wait(settings.WAIT_TIME_BETWEEN_EACH_STEP) # In seconds return browser
def get_driver(self,force=False): """ セレニアムドライバ初期化 """ if force : # 強制生成なら先にクローズしとく self.close() if not self.driver : # ヘッドレスFF初期化 # UA偽造 profile = webdriver.FirefoxProfile() profile.set_preference("general.useragent.override", self.user_agent) # ヘッドレス定義 options = Options() options.add_argument("--headless") # 起動 self.driver = webdriver.Firefox(profile, firefox_options=options) # PhantomJS初期化 # ユーザーエージェント偽装 ua = dict(DesiredCapabilities.PHANTOMJS) ua['phantomjs.page.settings.userAgent'] = (self.user_agent) # 初期化 #self.driver = webdriver.PhantomJS(desired_capabilities=ua) # Httpヘッダ設定 # ウェイト設定 self.driver.implicitly_wait(15) # seconds return self.driver
def _setup_firefox(self, capabilities): """Setup Firefox webdriver :param capabilities: capabilities object :returns: a new local Firefox driver """ if capabilities.get("marionette"): gecko_driver = self.config.get('Driver', 'gecko_driver_path') self.logger.debug("Gecko driver path given in properties: %s", gecko_driver) else: gecko_driver = None # Get Firefox binary firefox_binary = self.config.get_optional('Firefox', 'binary') firefox_options = Options() if self.config.getboolean_optional('Driver', 'headless'): self.logger.debug("Running Firefox in headless mode") firefox_options.add_argument('-headless') self._add_firefox_arguments(firefox_options) if firefox_binary: firefox_options.binary = firefox_binary log_path = os.path.join(DriverWrappersPool.output_directory, 'geckodriver.log') try: # Selenium 3 return webdriver.Firefox(firefox_profile=self._create_firefox_profile(), capabilities=capabilities, executable_path=gecko_driver, firefox_options=firefox_options, log_path=log_path) except TypeError: # Selenium 2 return webdriver.Firefox(firefox_profile=self._create_firefox_profile(), capabilities=capabilities, executable_path=gecko_driver, firefox_options=firefox_options)
def start_driver(self, browser_type, capabilities, config_section=None): """ Prepare selenium webdriver. :param browser_type: type of browser for which prepare driver :param capabilities: capabilities used for webdriver initialization """ # get browser profile browser_profile = self.get_browser_profile(browser_type, capabilities, config_section) # starts local browser if browser_type == "firefox": from selenium.webdriver.firefox.options import Options firefox_options = Options() for arg in self.get_browser_arguments(config_section): firefox_options.add_argument(arg) driver = webdriver.Firefox(browser_profile, desired_capabilities=capabilities, firefox_options=firefox_options) elif browser_type == "chrome": driver = webdriver.Chrome(desired_capabilities=capabilities, chrome_options=browser_profile) elif browser_type == "ie": driver = webdriver.Ie(capabilities=capabilities) elif browser_type == "phantomjs": driver = webdriver.PhantomJS(desired_capabilities=capabilities) elif browser_type == "opera": driver = webdriver.Opera(desired_capabilities=capabilities) # SafariDriver bindings for Python not yet implemented # elif browser == "Safari": # self.driver = webdriver.SafariDriver() else: raise ValueError('Unknown type of browser.') return driver
def test_to_capabilities(self): opts = Options() assert opts.to_capabilities() == {} profile = FirefoxProfile() opts.profile = profile caps = opts.to_capabilities() assert "moz:firefoxOptions" in caps assert "profile" in caps["moz:firefoxOptions"] assert isinstance(caps["moz:firefoxOptions"]["profile"], basestring) assert caps["moz:firefoxOptions"]["profile"] == profile.encoded opts.add_argument("--foo") caps = opts.to_capabilities() assert "moz:firefoxOptions" in caps assert "args" in caps["moz:firefoxOptions"] assert caps["moz:firefoxOptions"]["args"] == ["--foo"] binary = FirefoxBinary() opts.binary = binary caps = opts.to_capabilities() assert "moz:firefoxOptions" in caps assert "binary" in caps["moz:firefoxOptions"] assert isinstance(caps["moz:firefoxOptions"]["binary"], basestring) assert caps["moz:firefoxOptions"]["binary"] == binary._start_cmd opts.set_preference("spam", "ham") caps = opts.to_capabilities() assert "moz:firefoxOptions" in caps assert "prefs" in caps["moz:firefoxOptions"] assert isinstance(caps["moz:firefoxOptions"]["prefs"], dict) assert caps["moz:firefoxOptions"]["prefs"]["spam"] == "ham"
def getCDMStatusPage(tid_crm): from selenium import webdriver from selenium.webdriver.firefox.options import Options options = Options() options.add_argument("--headless") browser = webdriver.Firefox(firefox_options=options) # now Firefox will run headless # you will not see the browser. link = 'http://172.18.65.42/monitorcdm/' browser.get(link) browser.find_elements_by_css_selector("input[type='radio'][value='GUEST']")[0].click() browser.find_element_by_class_name('tbutton').click() browser.get(link) browser.get('http://172.18.65.42/monitorcdm/?_module_=search_tid') form_textfield = browser.find_element_by_name('_termid_') form_textfield.send_keys(tid_crm) browser.find_element_by_class_name('tbutton').click() html = browser.page_source browser.quit() return html
def open(self): ''' In order to have selenium working with Firefox and be able to get SAP Notes from launchpad.support.sap.com you must: 1. Use a browser certificate (SAP Passport) in order to avoid renewed logons. You can apply for it at: https://support.sap.com/support-programs-services/about/getting-started/passport.html 2. Get certificate and import it into Firefox. Open menu -> Preferences -> Advanced -> View Certificates -> Your Certificates -> Import 3. Trust this certificate (auto select) 4. Check it. Visit some SAP Note url in Launchpad. No credentials will be asked. Launchpad must load target page successfully. ''' driver = None utils = self.get_service('Utils') options = Options() options.add_argument('--headless') FIREFOX_PROFILE_DIR = utils.get_firefox_profile_dir() FIREFOX_PROFILE = webdriver.FirefoxProfile(FIREFOX_PROFILE_DIR) try: driver = webdriver.Firefox(firefox_profile=FIREFOX_PROFILE, firefox_options=options) except Exception as error: self.log.error(error) # Geckodriver not found # Download it from: # https://github.com/mozilla/geckodriver/releases/latest self.log.debug("Webdriver initialited") return driver
def load_driver(): """ Loads the firefox driver in headless mode. """ options = Options() options.add_argument("--headless") driver = webdriver.Firefox(firefox_options=options) return driver
def setUp(self): superuser = User.objects.create_superuser(self.username, '*****@*****.**', self.password) self.existing = TestModel.objects.get(pk=1) # Instantiating the WebDriver will load your browser options = Options() if settings.HEADLESS_TESTING: options.add_argument("--headless") self.webdriver = CustomWebDriver(firefox_options=options, )
def setUp(self): if _CI: self.driver = self.sauce_chrome_webdriver() elif settings.SELENIUM is True: options = FirefoxOptions() options.add_argument('-headless') self.driver = Firefox(firefox_options=options) self.driver.implicitly_wait(10)
def setUp(self): # Firefox options_firefox = OptionsFF() options_firefox.add_argument('-headless') self.firefox_driver = webdriver.Firefox(firefox_options=options_firefox) # Chrome options_chrome = OptionsChrom() options_chrome.add_argument('-headless') self.chrome_driver = webdriver.Chrome(chrome_options=options_chrome)
def test_arguments(self): opts = Options() assert len(opts.arguments) == 0 opts.add_argument("--foo") assert len(opts.arguments) == 1 opts.arguments.append("--bar") assert len(opts.arguments) == 2 assert opts.arguments == ["--foo", "--bar"]
def test_rendering_utf8_iframe(): iframe = elem.IFrame(html=u'<p>Cerrahpaşa Tıp Fakültesi</p>') options = Options() options.add_argument('-headless') driver = Firefox(options=options) driver.get('data:text/html,' + iframe.render()) driver.switch_to.frame(0) assert u'Cerrahpaşa Tıp Fakültesi' in driver.page_source
def setUp(self): options = Options() options.add_argument('-headless') self.browser = webdriver.Firefox(options=options) self.browser.get(redbot_uri) self.uri = self.browser.find_element_by_id("uri") self.uri.send_keys(self.test_uri) self.uri.submit() time.sleep(2.0) self.check_complete()
def new_instance(self): """ initializes a new selenium web driver instance by using either PhantomJS or Mozilla and returns a reference to the browser object for further processing """ options = Options() if self.headless: print_debug(self.debug, 'actiating headless mode') options.add_argument('-headless') driver = webdriver.Firefox(firefox_options=options) driver.set_window_size(1024, 768) driver.set_script_timeout(5) return driver
def before_all(context): print("context", context) # Determine the target path. Can either be file path or base URL. if 'TARGET' in os.environ: context.target = os.environ['TARGET'] else: print("Please specify the Phenogrid file path or base URL with 'TARGET=' format") sys.exit(1) # Check to see which browser to use, default to use Firefox if 'BROWSER' in os.environ and os.environ['BROWSER'] == 'phantomjs': context.browser = webdriver.PhantomJS() print("# Using PhantomJS") else: options = Options() options.add_argument('-headless') context.browser = Firefox(firefox_options=options) # print("# Using Firefox") # d = DesiredCapabilities.FIREFOX # d['marionette'] = True # # d['binary'] = '/Applications/Firefox.app/Contents/MacOS/firefox-bin' # d['loggingPrefs'] = {'browser': 'ALL', 'client': 'ALL', 'driver': 'ALL', 'performance': 'ALL', 'server': 'ALL'} # fp = webdriver.FirefoxProfile() # fp.set_preference('devtools.jsonview.enabled', False) # fp.set_preference('javascript.options.showInConsole', True) # fp.set_preference('browser.dom.window.dump.enabled', True) # fp.set_preference('devtools.chrome.enabled', True) # fp.set_preference("devtools.webconsole.persistlog", True) # fp.set_preference("devtools.browserconsole.filter.jslog", True) # fp.set_preference("devtools.browserconsole.filter.jswarn", True) # fp.set_preference("devtools.browserconsole.filter.error", True) # fp.set_preference("devtools.browserconsole.filter.warn", True) # fp.set_preference("devtools.browserconsole.filter.info", True) # fp.set_preference("devtools.browserconsole.filter.log", True) # fp.set_preference("devtools.webconsole.filter.jslog", True) # fp.set_preference("devtools.webconsole.filter.jswarn", True) # fp.set_preference("devtools.webconsole.filter.error", True) # fp.set_preference("devtools.webconsole.filter.warn", True) # fp.set_preference("devtools.webconsole.filter.info", True) # fp.set_preference("devtools.webconsole.filter.log", True) # context.browser = webdriver.Firefox(capabilities=d, firefox_profile=fp, executable_path='/usr/local/bin/geckodriver') # context.browser._is_remote = False # Set a 30 second implicit wait - http://selenium-python.readthedocs.org/en/latest/waits.html#implicit-waits # Once set, the implicit wait is set for the life of the WebDriver object instance. context.browser.set_window_size(1440, 900) context.browser.implicitly_wait(30) # seconds
def reset_browser(self): if globals.browser == None: options = Options() options.add_argument(self.__get_arg()) globals.browser = Firefox(AbstractBrowserBasedTest._firefox_profile, firefox_options = options, log_path=naming.GECKODRIVER_LOG_FILE_PATH) globals.browser.set_page_load_timeout(self.DEFAULT_TIMEOUT) else: globals.browser.close() globals.browser.start_session(capabilities = AbstractBrowserBasedTest._firefox_capabilities,\ browser_profile = AbstractBrowserBasedTest._firefox_profile) globals.browser.delete_all_cookies() # Belt and Braces.
def setUp(self): self.single_empty = TestModelSingle() self.single_empty.save() self.single = TestModelSingle(**{'selection': 'octopus', }) self.single.save() self.advanced_empty = TestModelAdvanced() self.advanced_empty.save() self.advanced = TestModelAdvanced(**{'set': 'set1', }) self.advanced.save() self.superuser = create_superuser() # Instantiating the WebDriver will load your browser options = Options() if settings.HEADLESS_TESTING: options.add_argument("--headless") self.webdriver = CustomWebDriver(firefox_options=options, )
def setup_package(): """Set up the Selenium driver once for all tests.""" # Just skipping *setup_package* and *teardown_package* generates an # uncaught exception under Python 2.6. if tests_are_run: if not SHOW_BROWSER: # Perform all graphical operations in memory. vdisplay = SeleniumTestCase.vdisplay = Xvfb(width=1280, height=720) vdisplay.start() # Create a Selenium browser instance. options = Options() options.add_argument('-headless') selenium = SeleniumTestCase.selenium = Firefox(firefox_options=options) selenium.maximize_window() SeleniumTestCase.wait = ui.WebDriverWait(selenium, 10) SeleniumTestCase.selenium.implicitly_wait(3)
def create_browser(request, driver_wait_time, tries=0): """This sometimes fails to start firefox on CI, so we retry...""" max_tries = 5 options = Options() options.add_argument('-headless') try: driver = webdriver.Firefox(firefox_options=options) driver.implicitly_wait(driver_wait_time) driver.set_window_size(1200, 1200) request.node._driver = driver return driver except Exception as e: if tries < max_tries: return create_browser(request, driver_wait_time, tries=tries + 1) else: raise e
def browserEngine(response): options = Options() options.add_argument('--headless') browser = webdriver.Firefox(options=options) response = re.sub(r'<script.*?src=.*?>', '<script src=#>', response, re.I) response = re.sub(r'href=.*?>', 'href=#>', response, re.I) writer(response, 'test.html') browser.get('file://' + sys.path[0] + '/test.html') os.remove('test.html') popUp = False actions = webdriver.ActionChains(browser) try: actions.move_by_offset(2, 2) actions.perform() browser.close() except UnexpectedAlertPresentException: popUp = True browser.quit() return popUp
def firefox_options(request, firefox_path, firefox_profile): options = Options() if firefox_profile is not None: options.profile = firefox_profile if firefox_path is not None: options.binary = FirefoxBinary(firefox_path) args = request.node.get_marker('firefox_arguments') if args is not None: for arg in args.args: options.add_argument(arg) prefs = request.node.get_marker('firefox_preferences') if prefs is not None: for name, value in prefs.args[0].items(): options.set_preference(name, value) return options
def _capture(self): """Save snapshot image of webpage, and set captured datetime.""" from selenium import webdriver from selenium.webdriver.firefox.options import Options options = Options() options.add_argument('--headless') browser = webdriver.Firefox(options=options) # browser.set_page_load_timeout(10) # TODO capture_resolution = self._get_capture_resolution() browser.set_window_size(*capture_resolution) browser.get(self.url) viewport_height = browser.execute_script( 'return document.body.scrollHeight;') browser.set_window_size(capture_resolution[0], viewport_height) # TODO self.captured_at = timezone.now() png = browser.get_screenshot_as_png() browser.quit() self.image.save(self._generate_image_filename(), ContentFile(png)) return True
def _get_Firefox(self): try: bin_path = self.config.get('firefox_binary_path') binary = FirefoxBinary(bin_path) geckodriver_path = self.config.get('geckodriver_path') options = FirefoxOptions() profile = webdriver.FirefoxProfile() options.add_argument( 'user-agent={}'.format(self.user_agent)) if self.browser_mode == 'headless': options.set_headless(headless=True) #options.add_argument('window-size=1200x600') # optional if self.proxy: # this means that the proxy is user set, regardless of the type profile.set_preference("network.proxy.type", 1) if self.proxy.proto.lower().startswith('socks'): profile.set_preference("network.proxy.socks", self.proxy.host) profile.set_preference("network.proxy.socks_port", self.proxy.port) profile.set_preference("network.proxy.socks_version", 5 if self.proxy.proto[-1] == '5' else 4) profile.update_preferences() elif self.proxy.proto == 'http': profile.set_preference("network.proxy.http", self.proxy.host) profile.set_preference("network.proxy.http_port", self.proxy.port) else: raise ValueError('Invalid protocol given in proxyfile.') profile.update_preferences() self.webdriver = webdriver.Firefox(firefox_binary=binary, firefox_options=options, executable_path=geckodriver_path, firefox_profile=profile) return True except WebDriverException as e: # reaching here is bad, since we have no available webdriver instance. logger.error(e) return False
def main(): parser = argparse.ArgumentParser(description='Process some integers.') parser.add_argument('--url', help='the entry point URL', default='http://localhost:8080/') parser.add_argument('--browser', help='the browser to use: chrome|firefox', default='chrome') args = parser.parse_args() print(args) if args.browser == 'chrome': driver = webdriver.Chrome() elif args.browser == 'firefox': options = Options() options.add_argument('-headless') driver = webdriver.Firefox(firefox_options=options) else: raise 'Must specify which browser to use' try: test_case = OmegaUpTest(driver, args.url) test_case.run() finally: driver.quit()
def set_selenium_local_session( proxy_address, proxy_port, proxy_username, proxy_password, headless_browser, browser_profile_path, disable_image_load, page_delay, geckodriver_path, logger, ): """Starts local session for a selenium server. Default case scenario.""" browser = None err_msg = "" # set Firefox Agent to mobile agent user_agent = ( "Mozilla/5.0 (iPhone; CPU iPhone OS 12_1 like Mac OS X) AppleWebKit/605.1.15 " "(KHTML, like Gecko) FxiOS/18.1 Mobile/16B92 Safari/605.1.15" ) # keep user_agent Settings.user_agent = user_agent firefox_options = Firefox_Options() if headless_browser: firefox_options.add_argument("-headless") if browser_profile_path is not None: firefox_profile = webdriver.FirefoxProfile(browser_profile_path) else: firefox_profile = webdriver.FirefoxProfile() # set English language firefox_profile.set_preference("intl.accept_languages", "en-US") firefox_profile.set_preference("general.useragent.override", user_agent) if disable_image_load: # permissions.default.image = 2: Disable images load, # this setting can improve pageload & save bandwidth firefox_profile.set_preference("permissions.default.image", 2) if proxy_address and proxy_port: firefox_profile.set_preference("network.proxy.type", 1) firefox_profile.set_preference("network.proxy.http", proxy_address) firefox_profile.set_preference("network.proxy.http_port", proxy_port) firefox_profile.set_preference("network.proxy.ssl", proxy_address) firefox_profile.set_preference("network.proxy.ssl_port", proxy_port) # mute audio while watching stories firefox_profile.set_preference("media.volume_scale", "0.0") # prefer user path before downloaded one driver_path = geckodriver_path or get_geckodriver() browser = webdriver.Firefox( firefox_profile=firefox_profile, executable_path=driver_path, options=firefox_options, ) # add extenions to hide selenium browser.install_addon(create_firefox_extension(), temporary=True) # converts to custom browser # browser = convert_selenium_browser(browser) # authenticate with popup alert window if proxy_username and proxy_password: proxy_authentication(browser, logger, proxy_username, proxy_password) browser.implicitly_wait(page_delay) # set mobile viewport (iPhone X) browser.set_window_size(375, 812) message = "Session started!" highlight_print("browser", message, "initialization", "info", logger) return browser, err_msg
def download_gisaid_EpiCoV( uname, # username upass, # password normal, # normal mode (quite) wd, # output dir loc, # location host, # host cs, # collection start date ce, # collection end date ss, # submission start date se, # submission end date cg, # complete genome only hc, # high coverage only le, # low coverage excluding to, # timeout in sec rt, # num of retry iv, # interval in sec meta_dl # also download meta ): """Download sequences and metadata from EpiCoV GISAID""" # output directory if not os.path.exists(wd): os.makedirs(wd, exist_ok=True) wd = os.path.abspath(wd) # GISAID_FASTA = f'{wd}/sequences.fasta.bz2' # GISAID_TABLE = f'{wd}/gisaid_cov2020_acknowledgement_table.xls' GISAID_DTL_JASON = f'{wd}/gisaid_detail_metadata.json' # GISAID_TSV = f'{wd}/metadata.tsv.bz2' metadata = [] # MIME types mime_types = "application/octet-stream" mime_types += ",application/excel,application/vnd.ms-excel" mime_types += ",application/pdf,application/x-pdf" mime_types += ",application/x-bzip2" mime_types += ",application/x-gzip,application/gzip" # start fresh try: os.remove(GISAID_DTL_JASON) except OSError: pass print("Opening browser...") profile = webdriver.FirefoxProfile() profile.set_preference("browser.download.folderList", 2) profile.set_preference("browser.download.manager.showWhenStarting", False) profile.set_preference("browser.download.dir", wd) profile.set_preference("browser.helperApps.neverAsk.saveToDisk", mime_types) profile.set_preference("plugin.disable_full_page_plugin_for_types", mime_types) profile.set_preference("pdfjs.disabled", True) options = Options() if not normal: options.add_argument("--headless") driver = webdriver.Firefox(firefox_profile=profile, options=options) # driverwait driver.implicitly_wait(20) wait = WebDriverWait(driver, to) # open GISAID print("Opening website GISAID...") driver.get('https://platform.gisaid.org/epi3/frontend') waiting_sys_timer(wait) print(driver.title) assert 'GISAID' in driver.title # login print("Logining to GISAID...") username = driver.find_element_by_name('login') username.send_keys(uname) password = driver.find_element_by_name('password') password.send_keys(upass) driver.execute_script("return doLogin();") waiting_sys_timer(wait) # navigate to EpiFlu print("Navigating to EpiCoV...") epicov_tab = driver.find_element_by_xpath("//div[@id='main_nav']//li[3]/a") epicov_tab.click() waiting_sys_timer(wait) # when user doesn't enter time/location, download nextstrain sequences and metadata if not (cs or ce or ss or se or loc): # download from downloads section print("Clicking downloads...") pd_button = wait.until( EC.element_to_be_clickable( (By.XPATH, "//div[@class='sys-actionbar-bar']//div[3]"))) pd_button.click() waiting_sys_timer(wait) # have to click the first row twice to start the iframe iframe = waiting_for_iframe(wait, driver, rt, iv) driver.switch_to.frame(iframe) waiting_sys_timer(wait) print("Downloading Nextstrain sequences...") dl_button = wait.until( EC.element_to_be_clickable( (By.XPATH, '//div[contains(text(), "nextfasta")]'))) dl_button.click() waiting_sys_timer(wait) fn = wait_downloaded_filename(wait, driver, 3600) print(f"Downloaded to {fn}. ") waiting_sys_timer(wait) print("Downloading Nextstrain metadata...") dl_button = wait.until( EC.element_to_be_clickable( (By.XPATH, '//div[contains(text(), "nextmeta")]'))) dl_button.click() fn = wait_downloaded_filename(wait, driver, 1800) print(f"Downloaded to {fn}. ") waiting_sys_timer(wait) # go back to main frame back_button = wait.until( EC.element_to_be_clickable( (By.XPATH, '//button[contains(text(), "Back")]'))) back_button.click() driver.switch_to.default_content() waiting_sys_timer(wait) # have to reduce the range of genomes if cs or ce or ss or se or loc: print("Browsing EpiCoV...") browse_tab = wait.until( EC.element_to_be_clickable( (By.XPATH, '//*[contains(text(), "Browse")]'))) browse_tab.click() waiting_sys_timer(wait) waiting_table_to_get_ready(wait) # set location if loc: print("Setting location...") loc_input = driver.find_element_by_xpath( "//td/div[contains(text(), 'Location')]/../following-sibling::td/div/div/input" ) loc_input.send_keys(loc) waiting_sys_timer(wait, 7) # set host if host: print("Setting host...") host_input = driver.find_element_by_xpath( "//td/div[contains(text(), 'Host')]/../following-sibling::td/div/div/input" ) host_input.send_keys(host) waiting_sys_timer(wait, 7) # set dates date_inputs = driver.find_elements_by_css_selector( "div.sys-form-fi-date input") dates = (cs, ce, ss, se) for dinput, date in zip(date_inputs, dates): if date: print("Setting date...") dinput.send_keys(date) ActionChains(driver).send_keys(Keys.ESCAPE).perform() waiting_sys_timer(wait, 7) # complete genome only if cg: print("complete genome only...") checkbox = driver.find_element_by_xpath( '//input[@value="complete"]') checkbox.click() waiting_sys_timer(wait) # high coverage only if hc: print("high coverage only...") checkbox = driver.find_element_by_xpath('//input[@value="highq"]') checkbox.click() waiting_sys_timer(wait) # excluding low coverage if le: print("low coverage excluding...") checkbox = driver.find_element_by_xpath('//input[@value="lowco"]') checkbox.click() waiting_sys_timer(wait) # check if any genomes pass filters warning_message = None try: warning_message = driver.find_element_by_xpath( "//div[contains(text(), 'No data found.')]") except: pass if warning_message: print("No data found.") sys.exit(1) # select all genomes print("Selecting all genomes...") button_sa = driver.find_element_by_css_selector( "span.yui-dt-label input") button_sa.click() waiting_sys_timer(wait) # downloading sequence retry = 0 while retry <= rt: try: print("Downloading sequences for selected genomes...") button = driver.find_element_by_xpath( "//td[@class='sys-datatable-info']/button[contains(text(), 'Download')]" ) button.click() waiting_sys_timer(wait) # switch to iframe iframe = waiting_for_iframe(wait, driver, rt, iv) driver.switch_to.frame(iframe) waiting_sys_timer(wait) button = driver.find_element_by_xpath( "//button[contains(text(), 'Download')]") button.click() waiting_sys_timer(wait) driver.switch_to.default_content() fn = wait_downloaded_filename(wait, driver, 1800) print(f"Downloaded to {fn}.") break except: print(f"retrying...#{retry} in {iv} sec(s)") if retry == rt: print("Unexpected error:", sys.exc_info()) sys.exit(1) else: time.sleep(iv) retry += 1 # downloading metadata retry = 0 while retry <= rt: try: print( "Downloading acknowledgement table for selected genomes..." ) button = driver.find_element_by_xpath( "//td[@class='sys-datatable-info']/button[contains(text(), 'Download')]" ) button.click() waiting_sys_timer(wait) # switch to iframe iframe = waiting_for_iframe(wait, driver, rt, iv) driver.switch_to.frame(iframe) waiting_sys_timer(wait) label = driver.find_element_by_xpath( "//label[contains(text(), 'Acknowledgement Table')]") label.click() button = driver.find_element_by_xpath( "//button[contains(text(), 'Download')]") button.click() waiting_sys_timer(wait) driver.switch_to.default_content() fn = wait_downloaded_filename(wait, driver, 180) print(f"Downloaded to {fn}.") break except: print(f"retrying...#{retry} in {iv} sec(s)") if retry == rt: print("Unexpected error:", sys.exc_info()) sys.exit(1) else: time.sleep(iv) retry += 1 # iterate each pages if meta_dl: page_num = 1 print("Retrieving metadata...") while True: print(f"Starting processing page# {page_num}...") # retrieve tables tbody = wait.until( EC.presence_of_element_located( (By.XPATH, "//tbody[@class='yui-dt-data']"))) waiting_table_to_get_ready(wait) # interate each row for tr in tbody.find_elements_by_tag_name("tr"): td = tr.find_element_by_tag_name("td") driver.execute_script("arguments[0].scrollIntoView();", td) # have to click the first row twice to start the iframe iframe = None record_elem = None retry = 1 while retry <= rt: try: td.click() waiting_sys_timer(wait) iframe = driver.find_element_by_xpath("//iframe") if iframe: break else: raise except: print(f"retrying...#{retry} in {iv} sec(s)") if retry == rt: print("Failed") sys.exit(1) else: time.sleep(iv) retry += 1 driver.switch_to.frame(iframe) # detect error: "An internal server error occurred." # and "error-token: DYX47" error_token = driver.find_element_by_xpath("//b") if error_token: error_token_text = error_token.text if "error-token" in error_token.text: print( "[FATAL ERROR] A website internal server error occurred." ) print(error_token_text) sys.exit(1) # get the element of table with metadata record_elem = wait.until( EC.presence_of_element_located( (By.XPATH, "//div[@class='packer']"))) # parse metadata m = getMetadata(record_elem) metadata.append(m) print(f"{m['Accession ID']}\t{m['Virus name']}") # get back ActionChains(driver).send_keys(Keys.ESCAPE).perform() time.sleep(1) driver.switch_to.default_content() print(f"Compeleted page# {page_num}.") page_num += 1 # go to the next page retry = 1 button_next_page = None try: button_next_page = driver.find_element_by_xpath( f'//a[@page="{page_num}"]') except: break if button_next_page: print(f"Entering page# {page_num}...") while retry <= rt: try: button_next_page.click() time.sleep(10) current_page = driver.find_element_by_xpath( '//span[@class="yui-pg-current-page yui-pg-page"]' ).text if current_page != str(page_num): raise else: break except: print(f"retrying...#{retry} in {iv} sec(s)") if retry == rt: print("Failed") sys.exit(1) else: time.sleep(iv) retry += 1 # writing metadata to JSON file print("Writing detail metadata...") with open(GISAID_DTL_JASON, 'w') as outfile: json.dump(metadata, outfile) # close driver driver.quit()
def set_selenium_local_session( proxy_address, proxy_port, proxy_username, proxy_password, headless_browser, browser_profile_path, disable_image_load, page_delay, geckodriver_path, browser_executable_path, logfolder, logger, geckodriver_log_level, ): """Starts local session for a selenium server. Default case scenario.""" browser = None err_msg = "" firefox_options = Firefox_Options() if headless_browser: firefox_options.add_argument("-headless") if browser_profile_path is not None: firefox_profile = webdriver.FirefoxProfile(browser_profile_path) else: firefox_profile = webdriver.FirefoxProfile() if browser_executable_path is not None: firefox_options.binary = browser_executable_path # set "info" by default # set "trace" for debubging, Development only firefox_options.log.level = geckodriver_log_level # set English language firefox_profile.set_preference("intl.accept_languages", "en-US") firefox_profile.set_preference("general.useragent.override", Settings.user_agent) if disable_image_load: # permissions.default.image = 2: Disable images load, # this setting can improve pageload & save bandwidth firefox_profile.set_preference("permissions.default.image", 2) if proxy_address and proxy_port: firefox_profile.set_preference("network.proxy.type", 1) firefox_profile.set_preference("network.proxy.http", proxy_address) firefox_profile.set_preference("network.proxy.http_port", int(proxy_port)) firefox_profile.set_preference("network.proxy.ssl", proxy_address) firefox_profile.set_preference("network.proxy.ssl_port", int(proxy_port)) # mute audio while watching stories firefox_profile.set_preference("media.volume_scale", "0.0") # prevent Hide Selenium Extension: error firefox_profile.set_preference("dom.webdriver.enabled", False) firefox_profile.set_preference("useAutomationExtension", False) firefox_profile.set_preference("general.platform.override", "iPhone") firefox_profile.update_preferences() # geckodriver log in specific user logfolder geckodriver_log = "{}geckodriver.log".format(logfolder) # prefer user path before downloaded one driver_path = geckodriver_path or get_geckodriver() browser = webdriver.Firefox( firefox_profile=firefox_profile, executable_path=driver_path, log_path=geckodriver_log, options=firefox_options, ) # add extenions to hide selenium browser.install_addon(create_firefox_extension(), temporary=True) # converts to custom browser # browser = convert_selenium_browser(browser) # authenticate with popup alert window if proxy_username and proxy_password: proxy_authentication(browser, logger, proxy_username, proxy_password) browser.implicitly_wait(page_delay) # Apple iPhone X: 375, 812 # Apple iPhone XS Max: 414, 896 try: browser.set_window_size(414, 896) except UnexpectedAlertPresentException as exc: logger.exception( "Unexpected alert on resizing web browser!\n\t" "{}".format(str(exc).encode("utf-8")) ) close_browser(browser, False, logger) return browser, "Unexpected alert on browser resize" message = "Session started!" highlight_print("browser", message, "initialization", "info", logger) return browser, err_msg
def search(): client = MongoDB(environment=environment, db_name=db_name).client zds = client.dailypops.hotword.find({"question_state": 0}).limit(1000) # driver = webdriver.Firefox(options=options) # driver = webdriver.Firefox() path = r'C:\Users\EDZ\Documents\WeChat Files\wodexinwolai\FileStorage\File\2019-05/chromedriver' chrome_options = Options() chrome_options.add_argument('--headless') driver = webdriver.Chrome(executable_path=path, chrome_options=chrome_options) begin_url = "https://www.google.com/" driver.get(begin_url) driver.find_element_by_id("gb_70").click() driver.find_element_by_id("identifierId").send_keys( "*****@*****.**") driver.find_element_by_id("identifierNext").click() time.sleep(3) driver.find_element_by_xpath("//input[@name='password']").send_keys( "jiexin88") driver.find_element_by_id("passwordNext").click() time.sleep(5) for k in zds: # url = 'https://www.google.com/search?sxsrf=ACYBGNRgCAf2dRIVd6dwrtD4B82G2GPK7A%3A1569392173168&ei=LQaLXe35CcmmmAX5k50o&q=trump&oq={}&gs_l=psy-ab.3..35i39l2j0i131j0i3j0i131j0j0i3j0i131l2j0.4634.7621..8636...1.2..3.398.1517.0j7j1j1......0....1..gws-wiz.....10..0i71j0i67j0i131i67j35i362i39j0i131i273j0i273.jn_vf2Z0qbo&ved=0ahUKEwitxPq3qevkAhVJE6YKHflJBwUQ4dUDCAs&uact=5' url = "https://www.google.com/search?biw=1536&bih=890&ei=ZomJXceaOtCbmAWyi7egCg&q={}&oq=commp&gs_l=psy-ab.3.1.0i67l3j0i10l7.8182.9766..12758...0.0..0.135.565.0j5......0....1..gws-wiz.......0.KfvdJE90Egw" print('参数', k) hotword = k.get("hotword", "") hotword_id = k.get("hotword_id", "") event_id = k.get("event_id", "") hotword = hotword.split(" ") print('hotword_list', hotword) parms = '+'.join(hotword) # parms = 'competition' print('parms', parms) url = url.format(parms) # url_ = url.format(parms) print('url_', url) driver.get(url) # driver.find_element_by_class_name("related-question-pair").click() response = driver.page_source # print(response) html = etree.HTML(response) titles = html.xpath( '//div[@class="related-question-pair"]//div[@class="match-mod-horizontal-padding hide-focus-ring cbphWd"]//text()' ) print('titles', titles) contents = Selector(text=response).xpath( '//div[@class="related-question-pair"]//div[@class="gy6Qzb kno-ahide"]' ).extract() for title, content in zip(titles, contents): con = Selector(text=content).xpath( '//div[contains(@class,"mod")]//text()').extract() con = ' '.join(con) items = {} items['question_id'] = md5_(title + hotword_id) items['event_id'] = event_id items['hotword_id'] = hotword_id items['question'] = title items['answer'] = con items['source'] = '' items['release_time'] = '2019-09-25' items['time_stamp'] = int(time.time()) items['entity'] = [] items['label'] = [] items['static_page'] = 0 items['nlp_state'] = 0 print(items) client.dailypops.question.update( {'question_id': items['question_id']}, items, True) s1 = {'hotword_id': hotword_id} s2 = {'$set': {'question_state': 1}} client.dailypops.hotword.update(s1, s2) time.sleep(3)
import selenium from selenium import webdriver from selenium.webdriver.firefox.options import Options import time browser_options = Options() browser_options.headless = True browser_options.add_argument('--no-sandbox') browser_options.add_argument('--disable-dev-shm-usage') def searchLine(myline, url="http://cti.voa.gov.uk/cti/inits.asp", browser_options=browser_options): myline = myline.replace('"', '').split(',') if len(myline) != 16: print('[FATAL] Wrong format of input data, cannot perform research.') return 'fatalErr' postcode = myline[3] address = ' '.join(myline[7:10]) try: driver = webdriver.Firefox( options=browser_options, firefox_binary="/kaggle/working/firefox/firefox/firefox") driver.get(url) txtPC = driver.find_element_by_name("txtPostCode") driver.execute_script('arguments[0].value = arguments[1]', txtPC, postcode) driver.find_element_by_id('frmInitSForm').submit() time.sleep(1.5) scl_complex = driver.find_element_by_class_name('scl_complex')
from selenium import webdriver from selenium.webdriver.firefox.options import Options from selenium.webdriver.common.keys import Keys import requests, json firefox_options = Options() firefox_options.add_argument("--headless") headers = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36", 'content-type': 'application/json' } driver = webdriver.Firefox(firefox_options=firefox_options) driver.get('https://www.baidu.com') elem = driver.find_element_by_name("wd") # 找到输入框的元素 elem.clear() # 清空输入框里的内容 elem.send_keys(u"天气深圳") # 在输入框中输入'Kali Linux' elem.send_keys(Keys.RETURN) # 在输入框中输入回车键 driver.implicitly_wait(10) # 隐式等待 tqtoday = driver.find_element_by_css_selector('.op_weather4_twoicon_today') tqelemtitle = driver.find_element_by_css_selector('.c-gap-bottom-small a').text time = tqtoday.find_element_by_css_selector('.op_weather4_twoicon_date').text wd = tqtoday.find_element_by_css_selector('.op_weather4_twoicon_temp').text weath = tqtoday.find_element_by_css_selector('.op_weather4_twoicon_weath').text wind = tqtoday.find_element_by_css_selector('.op_weather4_twoicon_wind').text data = { "msgtype": "text", "text": { "content": tqelemtitle + '\n' + time + '\n' + wd + '\n' + weath + '\n' + wind
# coding: utf-8 import codecs #import cookielib import datetime import os import re import requests import urllib from bs4 import BeautifulSoup from selenium import webdriver from selenium.webdriver.firefox.options import Options options = Options() options.add_argument("--headless") LOG_DIR = os.path.dirname(os.path.abspath(__file__)) from threading import Thread, Lock try: from Queue import Queue, Empty unicode = unicode from urllib import urlencode except: from queue import Queue, Empty unicode = str from urllib.parse import urlencode def worker_get_bookinfo_detail(function, urls, result, mutex, session): while True: try: if mutex.acquire(): item = urls.get(block=False)
def main(): print(BANNER) args = parse_args() options = Options() if args.headless: options.add_argument("--headless") driver = webdriver.Firefox(executable_path=args.driver_path, options=options) do_login(driver, args.user, args.password) check_login(driver) if args.target.isdigit(): target_id = args.target target_username = get_username(driver, target_id) else: target_id = get_user_id(driver, args.target) target_username = args.target print('[*] Selected target: %s (%s)' % (target_username, target_id)) urls_to_visit = [] commenters = [] reactions = [] print('[*] Getting photos links... ', end=" ") photos = get_all_photos(driver, target_username, args.limit_photos)[:args.limit_photos] print('%d photos found' % len(photos)) print('[*] Getting stories links... ', end=" ") stories = get_all_stories(driver, target_id, args.limit_stories)[:args.limit_stories] print('%d stories found' % len(stories)) print( '[*] Retreiving users who have interacted... press Ctrl+C when you have enough' ) msg = '' try: for url in photos + stories: commenters += parse_commenters(driver.page_source) if len(commenters) < args.limit_comments: commenters += get_all_comments(driver, url, limit=args.limit_comments) if len(reactions) < args.limit_reactions: reactions += get_all_reactions(driver, url, limit=args.limit_reactions) users = list(set(reactions).union(set(commenters))) msg = '%sUnique users: %d Comments: %d Reactions: %d' % ( '\r' * len(msg), len(users), len(commenters), len(reactions)) print(msg, end='\r') except (KeyboardInterrupt, SystemExit): print('[!] KeyboardInterrupt received. %d users retrieved' % len(users)) reactions = reactions[:args.limit_reactions] commenters = commenters[:args.limit_comments] users = list(set(reactions).union(set(commenters))) print_statistics(commenters, reactions) users = fill_user_ids(driver, users) if args.output: store_pivots(users, args.output) else: store_pivots(users, '%s-pivots.txt' % target_id) if args.csv_output: store_csv(users, args.csv_output) print('[*] Found %d comments and %d reactions from %d unique users ' % (len(commenters), len(reactions), len(users))) driver.close()
# firefox driver: geckodriver.exe import requests from selenium import webdriver import time from selenium.webdriver.firefox.options import Options from bs4 import BeautifulSoup firefox_options = Options() firefox_options.add_argument('--headless') firefox_options.add_argument('--disable-gpu') def save_pdf(href): try: root = './/download//' kv = {'user-agent': 'Mozilla/5.0'} print(href) r = requests.get(href, headers=kv) path = root + href.split('/')[-1] with open(path, 'wb') as f: f.write(r.content) f.close() print("一份pdf") except: return "" # firefox_options.binary_location = r'C:\ProgramData\Anaconda3\Scripts\geckodriver.exe' # chrome_options.binary_location = '/opt/google/chrome/chrome' driver = webdriver.Firefox(
import sys import io from selenium import webdriver from selenium.webdriver.firefox.options import Options import time sys.stdout = io.TextIOWrapper(sys.stdout.detach(), encoding='utf-8') sys.stderr = io.TextIOWrapper(sys.stderr.detach(), encoding='utf-8') firefox_option = Options() firefox_option.add_argument("--headless") #CLI driver = webdriver.Firefox( firefox_options=firefox_option, executable_path=r'D:/atom_python/section3/webdriver/firefox/geckodriver.exe' ) driver.get("https://google.com") driver.save_screenshot( "D:/atom_python/section3/webdriver/firefox/website2.png")
import sys from selenium import webdriver from selenium.webdriver.common.by import By from selenium.common.exceptions import TimeoutException from selenium.webdriver.support.ui import WebDriverWait # available since 2.4.0 from selenium.webdriver.support import expected_conditions as EC # available since 2.26.0 from selenium.webdriver.firefox.options import Options inputfile = sys.argv[1] # presumbaly you have a list of input data you want to enter into the website to generate data output outputfile = "output_"+inputfile datafile = open(inputfile) outfile = open(outputfile,'w+') eachline = datafile.readline() options = Options() options.add_argument("--headless") driver = webdriver.Firefox(firefox_options=options) while eachline != '': inputdata = eachline.rstrip() #print postalcode #options = Options() #options.add_argument("--headless") #driver = webdriver.Firefox(firefox_options=options) # go to the google home page driver.get("website url you want to scrap") driver.find_element_by_id('txtSearch').send_keys("markup key to the data you want") # output data format driver.find_element_by_css_selector('input[type=\"button\"]').click() # virtually "clicking" button try: WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.ID, 'tabResults')))
def main(user, password): urlRoot = "https://es.zulutrade.com" urlLogin = "******" urlToScrap = "https://es.zulutrade.com/traders" columnsFile = "ubicationColumns.json" firefoxDirectory = r'D:\Navegadores\Mozilla Firefox\firefox.exe' today = datetime.datetime.strftime(datetime.datetime.now(), '%Y_%m_%d') createTodayDirectory(today) outputFile = "zulutrade_" + today + ".csv" columnsJson = getColumns(columnsFile) writeHeaderFile(outputFile, columnsJson["Columns"]) options = Options() options.add_argument("--headless") profile = webdriver.FirefoxProfile() profile.set_preference("dom.disable_beforeunload", True) profile.set_preference("browser.tabs.remote.autostart", False) profile.set_preference("browser.tabs.remote.autostart.1", False) profile.set_preference("browser.tabs.remote.autostart.2", False) profile.set_preference("browser.tabs.remote.force-enable", False) profile.set_preference('browser.download.folderList', 2) # custom location profile.set_preference('browser.download.manager.showWhenStarting', False) profile.set_preference('browser.download.dir', os.getcwd() + '\\' + today) profile.set_preference( 'browser.helperApps.neverAsk.saveToDisk', "application/xml,text/xml,application/csv,application/excel,application/vnd.msexcel,application/vnd.ms-excel,text/anytext,text/comma-separated-values,text/csv,application/vnd.ms-excel,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,application/octet-stream" ) profile.set_preference( "browser.helperApps.neverAsk.openFile", "application/xml,text/xml,application/csv,application/excel,application/vnd.msexcel,application/vnd.ms-excel,text/anytext,text/comma-separated-values,text/csv,application/vnd.ms-excel,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,application/octet-stream" ) profile.set_preference("browser.helperApps.alwaysAsk.force", False) profile.set_preference("browser.download.manager.useWindow", False) profile.set_preference("browser.download.manager.focusWhenStarting", False) profile.set_preference("browser.download.manager.alertOnEXEOpen", False) profile.set_preference("browser.download.manager.showAlertOnComplete", False) profile.set_preference("browser.download.manager.closeWhenDone", True) binary = FirefoxBinary(firefoxDirectory) driver = webdriver.Firefox(firefox_options=options, firefox_profile=profile, firefox_binary=binary) #driver = webdriver.Firefox(firefox_profile = profile,firefox_binary=binary) #driver = webdriver.Firefox(firefox_profile = profile) driver.get(urlLogin) userElement = driver.find_element_by_id("main_tbUsername") passwordElement = driver.find_element_by_id("main_tbPassword") userElement.send_keys(user) passwordElement.send_keys(password) driver.find_element_by_id("main_btnLogin").click() delayLogin = 30 #seconds delay = 90 #seconds try: element = WebDriverWait(driver, delayLogin).until( EC.presence_of_element_located((By.ID, 'user-top-container'))) except TimeoutException: print("Se excedió el tiempo de espera") driver.quit() raise LoginException() driver.get(urlToScrap) try: element = WebDriverWait(driver, delay).until( EC.presence_of_element_located( (By.XPATH, '//zl-load-more/button'))) except TimeoutException: print("Se excedió el tiempo de espera") driver.quit() raise Exception() moreDetailElement = driver.find_elements_by_xpath( "//zl-performance/div/div/div/div/button") print(len(moreDetailElement)) moreDetailElement[0].click() for i in range(59): print("Page: " + str(i)) try: element = WebDriverWait(driver, delayLogin).until( EC.presence_of_element_located( (By.XPATH, '//zl-load-more/button'))) except TimeoutException: print("Se excedió el tiempo de espera del boton de Cargar mas") break if len(driver.find_elements_by_xpath("//zl-load-more/button")) > 0: downloadMoreElement = driver.find_element_by_xpath( "//zl-load-more/button") downloadMoreElement.click() else: break #sleep(4.5) rowsElements = driver.find_elements_by_xpath( "//zl-performance-forex-list/div/table/tbody") print(len(rowsElements)) #badgesElements = driver.find_elements_by_xpath("//zl-trader-badge") #print(len(badgesElements)) for iRowElement in range(len(rowsElements)): print(iRowElement) rowData = getDataPerTrader(rowsElements[iRowElement], columnsJson["UbicationsGrid"]) ''' numElements = len(badgesElements[iRowElement].find_elements_by_xpath(".//ngl-icon[@ng-reflect-set-icon='icon-badge-partially-verified' or @ng-reflect-set-icon='icon-badge-fully-verified']")) print(numElements) if numElements > 0: print("Si hay elemento Check") checkIconElement = badgesElements[iRowElement].find_elements_by_xpath(".//ngl-icon[@ng-reflect-set-icon='icon-badge-partially-verified' or @ng-reflect-set-icon='icon-badge-fully-verified']")[numElements - 1] driver.execute_script("arguments[0].scrollIntoView();", rowsElements[iRowElement]) hover = ActionChains(driver).move_to_element(checkIconElement) hover.perform() sleep(2) soup = BeautifulSoup(driver.page_source, 'lxml') popUpElement = soup.find("zl-trader-verification-popover") #print(popUpElement) #To get lost of Focus of the little windows to iterate the next row hover = ActionChains(driver).move_to_element(badgesElements[iRowElement]) hover.perform() sleep(1) ''' badgesElementsHTML = rowsElements[iRowElement].find_element_by_xpath( ".//zl-trader-badge").get_attribute('innerHTML') for badge, item in columnsJson["UbicationsBadges"].items(): rowData[badge] = item["ICON"] in badgesElementsHTML #open tab driver.find_element_by_tag_name('body').send_keys(Keys.CONTROL + 't') print(driver.window_handles) driver.switch_to.window(driver.window_handles[1]) driver.get(rowData["Url"]) try: element = WebDriverWait(driver, delay).until( EC.presence_of_element_located( (By.XPATH, '//zl-timeframes/ngl-picklist/div/button'))) except TimeoutException: print("Se excedió el tiempo de espera") driver.quit() raise Exception() rowData = getDataInsidePagePerTrader(rowData, driver, columnsJson["UbicationsInside"]) graphicTimeElement = driver.find_element_by_xpath( "//zl-timeframes/ngl-picklist/div/button") graphicTimeElement.click() graphicTotalTimeElements = driver.find_elements_by_xpath( "//zl-timeframes/ngl-picklist/div/div/ul/li") graphicTotalTimeElements[len(graphicTotalTimeElements) - 1].click() excelFilename = "No hay archivo Excel disponible" if len( driver.find_elements_by_xpath( "//zl-trading-history-excel-export/span/button")) > 0: exportExcelElement = driver.find_element_by_xpath( "//zl-trading-history-excel-export/span/button") exportExcelElement.click() exportExcel2007Elements = driver.find_elements_by_xpath( "//zl-trading-history-excel-export/span/div/ul/li") exportExcel2007Elements[0].click() sleep(3) excelFilename = getLastFilename(os.getcwd() + '\\' + today) rowData["Excel"] = excelFilename print(rowData) dfTraders = pd.DataFrame(rowData, columns=columnsJson["Columns"], index=[0]) with open(outputFile, "a") as f: dfTraders.to_csv(f, header=None, index=False, encoding='ISO-8859-1', sep='|') # close the tab driver.find_element_by_tag_name('body').send_keys(Keys.CONTROL + 'w') driver.switch_to.window(driver.window_handles[0]) driver.quit()
def dictwebpage(request): # case 1: (OUTDATED since Glosbe doesn't work anymore) It needs to fetch an API. # AJAX has fetched the JSON on the wwww, then # the JSON obj from the www is sent to the view dictwebpage which processes it and # and sends back html. if 'json_obj' in request.GET.keys(): parsed_json_obj = json.loads(request.GET['json_obj']) return render(request, 'lwt/_glosbe_api.html', {'result': parsed_json_obj}) # case 2: AJAX sends the link to process to the view dictwebpage, # and the view sends backs a JSON containing the string URL. <iframe> displays it then. else: word = request.GET['word'] word_escaped = urllibparsequote(word) wbl = request.GET['wbl'] # case where it's a lookup sentence: if 'issentence' in request.GET.keys() and request.GET[ 'issentence'] != '': # no key "issentence" is sent if the value of 'issentence' is empty in AJAX wo_id = int(request.GET['issentence']) word = Sentences.objects.values_list( 'sentencetext', flat=True).get(sentence_having_this_word=wo_id) word_escaped = urllibparsequote(word) finalurl = wbl.replace('<WORD>', word_escaped) # finalurl = createTheDictLink(wbl, word) # create the url of the dictionary, integrating the searched word # case where we can't put the url in an iframe src. we must request the entire html webpage # and will display it in the iframe srcdoc if finalurl[0] == '^' or finalurl[ 0] == '!': # case where we open into the frame # try: # check that the URL is working. else display a well-formed error headers = { "User-Agent": "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727; .NET CLR 3.0.04506.30)", "Accept": "text/html,application/xhtml+xml,application/xml; q=0.9,image/webp,*/*;q=0.8" } reqest = Request(finalurl[1:], headers=headers) try: content = urlopen(reqest) # catch the redirect from Wiktionary except urllib.error.HTTPError as httpError: error = httpError.read().decode() # wiktionary has a special way to redirect to similar word if nothing found if 'wiktionary' in finalurl: redirect_url = _wiki_API_redirect(error, finalurl[1:], word_escaped) reqest = Request(redirect_url, headers=headers) try: content = urlopen(reqest) except: content = error # redirect doesn't work neither, so display the error else: content = error if finalurl[0] == '^': try: soup = BeautifulSoup(content, 'html.parser') html = _clean_soup(soup, finalurl) except: html = render_to_string('lwt/dictwebpage_not_working.html') result_str = escape(html) return HttpResponse(json.dumps(result_str)) if finalurl[ 0] == '#': # case where we use Selenium (Tricky website where scrapping is bloked) # detect if mac or else system = platform.system().lower() if system == 'windows' or system == 'linux': is_Mac = False else: is_Mac = True from selenium.webdriver.firefox.webdriver import WebDriver from functional_tests.selenium_base import Base from selenium.webdriver.common.by import By from selenium.webdriver.firefox.options import Options as FirefoxOptions options = FirefoxOptions() options.add_argument("--headless") selenium = WebDriver(options=options) selenium.get('{}'.format(finalurl[1:])) base = Base() base.selenium = selenium if 'naver' in finalurl: base.wait_until_appear(By.ID, 'searchPage_entry') content = selenium.execute_script( "return document.documentElement.outerHTML;") if 'naver' in finalurl: translation_result = _naver_API(content, finalurl) context = { 'translation_result': translation_result, 'API_name': 'naver' } context['is_Mac'] = is_Mac return render(request, 'lwt/_translation_api.html', context) if finalurl[0] == '!' or finalurl[ 0] == '#': # this dictionary uses my custom APIs (for ex. Google translate) context = {} # detect if mac or else system = platform.system().lower() if system == 'windows' or system == 'linux': is_Mac = False else: is_Mac = True if 'https://translate.google.com' in finalurl: translation_result = _google_API(content) context = { 'url': finalurl[1:], 'url_name': 'Google Translate', 'trans_item_nb': len(translation_result), 'translation_result': translation_result, 'word_OR_sentence_origin': word, 'is_Mac': is_Mac } return render(request, 'lwt/_google_api.html', context) if 'pons.com/translate' in finalurl: translation_result = _pons_API(content, finalurl) context = { 'translation_result': translation_result, 'API_name': 'pons' } if 'dict.cc' in finalurl: translation_result = _dictcc_API(content, finalurl) context = { 'translation_result': translation_result, 'API_name': 'dictcc' } if 'wordref' in finalurl: translation_result = _wordref_API(content, finalurl) context = { 'translation_result': translation_result, 'API_name': 'wordref' } if 'wiktionary' in finalurl: translation_result = _wiki_API(content, finalurl) context = { 'translation_result': translation_result, 'API_name': 'wiki' } if 'youdao' in finalurl: translation_result = _youdao_API(content, finalurl) context = { 'translation_result': translation_result, 'API_name': 'youdao' } context['is_Mac'] = is_Mac return render(request, 'lwt/_translation_api.html', context) return HttpResponse( json.dumps(finalurl)) # case where we open into a new window
messageless_log_path = "../../newwebdata/messagelog1.txt" if not os.path.isdir(savepath): os.mkdir(savepath) logfile = open(logpath,'a+') def makelog(logmessage): logfile.write(logmessage + '\n') messagelogfile = open(messageless_log_path,'a+') def messagelesslog(logmessage): messagelogfile.write(logmessage + '\n') # option = webdriver.ChromeOptions() option = Options() option.add_argument('--no-sandbox') option.add_argument('--disable-dev-shm-usage') option.add_argument('--headless') #静默运行 option.add_argument('log-level=3') option.add_argument('--disable-gpu') # 禁用GPU加速,GPU加速可能会导致Chrome出现黑屏,且CPU占用率高达80%以上 browser = webdriver.Firefox(options=option) # browser = webdriver.Chrome(options=option) browser.implicitly_wait(time_limit) browser.set_page_load_timeout(time_limit) # 查询网址,爬取内容 # def requesturl(url, savefilepath): def requesturl(url): print(url) webinfo={} #最后保存的数据 webtext = [] #首页内容文本
def init_browser(): global browser options = Options() options.add_argument('--headless') browser = webdriver.Firefox(options=options)
def parse_page(self, response): try: from pyvirtualdisplay import Display display = Display(visible=0, size=(800, 800)) display.start() firefox_options = Options() firefox_options.add_argument('-headless') firefox_options.add_argument('--disable-gpu') driver = webdriver.Firefox(firefox_options=firefox_options, executable_path=settings.FIREFOX_PATH) driver.get(response.url) driver.implicitly_wait(100) elem_code = driver.find_elements_by_id('WarehouseCode') elem_acode = driver.find_elements_by_id('AccountCode') elem_name = driver.find_elements_by_id('UserName') elem_pass = driver.find_elements_by_id('Password') btn_login = driver.find_elements_by_css_selector('input[name="Login"]') if elem_code: elem_code[0].send_keys('03') if elem_acode: elem_acode[0].send_keys('001862') if elem_name: elem_name[0].send_keys('MAXLEAD') if elem_pass: elem_pass[0].send_keys('1202HXML') btn_login[0].click() driver.implicitly_wait(100) time.sleep(5) total_page = driver.find_elements_by_css_selector('#navigationTR nobr')[0].text total_page = int(total_page.split(' ')[-1]) for i in range(total_page): try: res = driver.find_elements_by_css_selector('#ViewManyListTable tr') elem = driver.find_element_by_id('MetaData') elem.click() res.pop(0) for val in res: td_re = val.find_elements_by_tag_name('td') if td_re: sku = td_re[0].text warehouse = 'Hanover' if td_re[3].text and not td_re[3].text == ' ': qty = td_re[3].text qty = qty.replace(',','') else: qty = 0 qty_sql = "select id from mmc_stocks where commodity_repertory_sku='%s' and warehouse='%s'" % ( sku, warehouse) self.db_cur.execute(qty_sql) self.db_cur.fetchone qty_re = self.db_cur.rowcount values = (qty, sku, warehouse) if qty_re > 0: sql = "update mmc_stocks set qty=%s where commodity_repertory_sku=%s and warehouse=%s" else: sql = "insert into mmc_stocks (qty, commodity_repertory_sku, warehouse) values (%s, %s, %s)" self.db_cur.execute(sql, values) if i < total_page: elem_next_page = driver.find_elements_by_id('Next') if elem_next_page: elem_next_page[0].click() driver.implicitly_wait(100) except: continue self.conn.commit() sql = "update mmc_spider_status set status=3, description='' where warehouse='Hanover'" self.db_cur.execute(sql) self.conn.commit() except Exception as e: values = (str(e),) sql = "update mmc_spider_status set status=2, description=%s where warehouse='Hanover'" self.db_cur.execute(sql, values) self.conn.commit() try: driver.refresh() driver.switch_to.alert.accept() driver.implicitly_wait(100) except: pass display.stop() driver.quit()
class mask(): ''' Mask object ''' def __init__(self, path, g_prefix, b_prefix): self.g_prefix = g_prefix self.b_prefix = b_prefix self.path = path self.opts = Options() self.opts.add_argument('-private') def _check_tor(self): ''' Checks for running TOR browser Only works on Linux based systems @param none @return boolean status ''' CMD = "netstat -ano | grep LISTEN | grep 9150 > /dev/null 2>&1" if (os.system(CMD) > 0): return False else: return True def _start_tor(self): ''' Start TOR browser @param none @return boolean status ''' CMD = "start-tor-browser" try: p = subprocess.Popen(self.path + CMD) except: return False while True: # Give TOR browser time to open if self._check_tor(): break else: time.sleep(2) return True def _get_ua(self): ''' Get random user agent string @param none @return String ''' ua = [ "Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1", "Mozilla/5.0 (Linux; U; Android 4.4.2; en-us; SCH-I535 Build/KOT49H) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:53.0) Gecko/20100101 Firefox/53.0", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14393", "Mozilla/5.0 (iPad; CPU OS 8_4_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12H321 Safari/600.1.4", "Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1", "Mozilla/5.0 (Linux; Android 6.0.1; SAMSUNG SM-G570Y Build/MMB29K) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/4.0 Chrome/44.0.2403.133 Mobile Safari/537.36", "Mozilla/5.0 (Linux; Android 5.0; SAMSUNG SM-N900 Build/LRX21V) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/2.1 Chrome/34.0.1847.76 Mobile Safari/537.36", "Mozilla/5.0 (Linux; Android 6.0.1; SAMSUNG SM-N910F Build/MMB29M) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/4.0 Chrome/44.0.2403.133 Mobile Safari/537.36", "Mozilla/5.0 (Linux; U; Android-4.0.3; en-us; Galaxy Nexus Build/IML74K) AppleWebKit/535.7 (KHTML, like Gecko) CrMo/16.0.912.75 Mobile Safari/535.7", "Mozilla/5.0 (Linux; Android 7.0; HTC 10 Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.83 Mobile Safari/537.36", ] return ua[randint(0, (len(ua) - 1))] def _get_tor_browser_profile(self): ''' Get TOR browser profile @param none @return selenium webdriver ''' profile = webdriver.FirefoxProfile() proxyIP = "127.0.0.1" proxyPort = 9150 profile.set_preference("network.proxy.type", 1) profile.set_preference("network.proxy.socks", proxyIP) profile.set_preference("network.proxy.socks_port", int(proxyPort)) profile.set_preference("network.proxy.socks_remote_dns", True) profile.set_preference("browser.privatebrowsing.autostart", True) return profile def _get_proxy_list(self): ############################################################################## ####################### INCOMPLETE ########################################## ############################################################################# ''' Get up-to-date list of free proxy server IP:PORT, return it as a list to cycle through. OPTIONS: Return the list of proxies, or return a webdriver using each proxy.... Not sure yet ''' URI = "https://free-proxy-list.net/" def get_tor_browser(self): ''' Get firefox browser using TOR proxy @param none @return selenium webdriver ''' if not self._check_tor(): print(self.g_prefix + "TOR not running, starting TOR") time.sleep(2) if not self._start_tor(): print(self.b_prefix + "Could not start TOR browser") return None else: print(self.g_prefix + "TOR started successfully") return webdriver.Firefox(self._get_tor_browser_profile(), firefox_options=self.opts) def swap_ident(self): ''' Swap TOR browser identity @param none @return boolean status ''' if self._check_tor(): with Controller.from_port(port=9151) as controller: controller.authenticate() controller.signal(Signal.NEWNYM) time.sleep(1) # Give the identity time to reset return True return False
def generate_driver(self): options = FirefoxOptions() options.add_argument("-headless") return webdriver.Firefox(executable_path=self.execute_path, options=options)
""" Functional Tests for Superlists """ from selenium.webdriver import Firefox from selenium.webdriver.firefox.options import Options firefox_options = Options() firefox_options.add_argument('-headless') BROWSER = Firefox(firefox_options=firefox_options) BROWSER.get('http://localhost:8000') assert 'Django' in BROWSER.title
from selenium import webdriver from selenium.webdriver.firefox.options import Options import os import config import utils dirpath = os.getcwd() options = Options() options.add_argument("--headless") options.add_argument("--width=1920") options.add_argument("--height=1080") def type_in_origin(driver, origin): form_origin = driver.find_element_by_id(config.origin_field) form_origin.send_keys(origin) def type_in_destination(driver, destination): form_destination = driver.find_element_by_id(config.destination_field) form_destination.send_keys(destination) def navigate_to_result_page(journey): driver = webdriver.Firefox(executable_path = dirpath + '/geckodriver',options = options) driver.get(config.url) type_in_origin(driver, journey.get('origin')) type_in_destination(driver, journey.get('destination')) btn_submit = driver.find_element_by_class_name(config.submit_button_class) btn_submit.click() btn_show_details = driver.find_element_by_id(config.details_button_id) btn_show_details.click()
import time, scrapy, hashlib from selenium import webdriver from lxml import etree from scrapy.selector import Selector from database.mongodb import MongoDB from selenium.webdriver.firefox.options import Options options = Options() options.add_argument('--headless') from items import eventItem, articleItem, hotwordItem, questionItem from selenium.common.exceptions import TimeoutException from scrapy.crawler import CrawlerProcess # driver = webdriver.Firefox() environment = 'local' db_name = 'dailypops' # class Question(scrapy.Spider): # name = 'question' # allowed_domain=[] # custom_settings = { # 'LOG_LEVEL': 'ERROR', # 'CONCURRENT_REQUESTS': 1, # 'DOWNLOAD_DELAY': 0.2, # 'CONCURRENT_REQUESTS_PER_DOMAIN': 1, # 'ITEM_PIPELINES': {'pipeline.pipeline.MongodbPipeline': 300}, # 'DOWNLOADER_MIDDLEWARES': {'middleware.middlewares.GoogleMiddleware': 400} # } # # def start_requests(): # # while True: # # time.sleep(6)
class HistoryDragonTigerList(): """ 龙虎榜 """ def __init__(self): # 创建一个存放标题的列表 self.title_list = [] # 创建一个存放股票数据的二维列表 self.row_list = [] # 行 self.column_list = [] # 列 # self.driver = webdriver.Firefox() # self.driver.set_window_position(0, 0) # self.driver.set_window_size(1400, 900) # self.driver.maximize_window() # 让窗口最大化 # 使用以下三行代码可以不弹出界面,实现无界面爬取 self.options = Options() self.options.add_argument('--headless') self.options.add_argument('--disable-gpu') self.driver = webdriver.Firefox( executable_path='geckodriver', options=self.options) # 配了环境变量第一个参数就可以省了,不然传绝对路径 self.current_url = "http://data.eastmoney.com/stock/tradedetail.html" self.WAIT = WebDriverWait(self.driver, 15) def load_page_by_xpath(self, web_driver_wait, xpath_elem): """ 通过selenium的xpath加载页面元素 :param web_driver_wait: :param xpath_elem: :return: 被加载的元素 """ load_elem = web_driver_wait.until( EC.presence_of_element_located((By.XPATH, xpath_elem))) return load_elem def update_date_range(self, start_date, end_date): """ :param start_date: :param end_date: :return: """ remove_start_date = 'document.getElementsByClassName("date-input")[0].removeAttribute("readonly");' self.driver.execute_script(remove_start_date) add_start_date = 'document.getElementsByClassName("date-input")[0].value="' + start_date + '"' self.driver.execute_script(add_start_date) remove_end_date = 'document.getElementsByClassName("date-input")[1].removeAttribute("readonly");' self.driver.execute_script(remove_end_date) add_end_date = 'document.getElementsByClassName("date-input")[1].value="' + end_date + '"' self.driver.execute_script(add_end_date) query_elem = '//*[@id="divSjri"]/div[2]/div[2]' self.load_page_by_xpath(self.WAIT, query_elem).click() time.sleep(10) print("时间修改完毕" + start_date + "\t" + end_date) def analysis_page_source(self, html, filename): """ 解析网页 :param filename: :param html: :return: """ soup = BeautifulSoup(html, 'lxml') date_elem = soup.select_one("#search_date_start") year = str(date_elem.attrs["value"]).replace('-', '')[0:4] # 获取标题信息 title_items = soup.select_one('#tab-1 > thead').find("tr").find_all( "th") for item in title_items: if item.text != '相关': self.title_list.append(str(item.text).strip()) self.title_list.append("星期") Utils.Utils.print_title(self.title_list) self.title_list.clear() # 获取股票数据 tr_elems = soup.select("#tab-1 > tbody > tr") for tr_elem in tr_elems: td_items = tr_elem.select("td") td_size = len(td_items) for i in range(0, td_size): if i == 17: self.column_list.append( str(td_items[i].select_one( 'span').attrs['title']).strip()) elif i == 4: month_day = str(td_items[i].text).strip() dt = year + month_day[0:2] + month_day[3:5] self.column_list.append(dt) weekday = Utils.Utils.date2weekday(dt) elif i != 3: self.column_list.append(str(td_items[i].text).strip()) self.column_list.append(weekday) self.row_list.append(self.column_list) self.column_list = [] Utils.Utils.save_file(filename, self.row_list, 'a') self.row_list.clear() def get_current_window(self): """ 获取当前页面 :return: """ time.sleep(10) # 获取当前页面句柄 current_window = self.driver.current_window_handle # 获取所有页面句柄 all_Handles = self.driver.window_handles # 如果新的pay_window句柄不是当前句柄,用switch_to_window方法切换 for new_window in all_Handles: if new_window != current_window: self.driver.switch_to.window(new_window) # 隐式等待n秒,解释JavaScript是需要时间的,如果短了就无法正常获取数据,如果长了浪费时间; # implicitly_wait()给定时间智能等待 self.driver.implicitly_wait(15) def get_stock_data(self, path, start_date, end_date): """ 获取龙虎榜数据 :param path: 文件保存路径 :param start_date: yyyyMMdd :param end_date: yyyyMMdd :return: """ if not os.path.exists(path): os.makedirs(path) file_name = path + '/' + start_date + '-' + end_date if os.path.exists(file_name): print(file_name + " 文件已存在...\t退出") return self.driver.get(self.current_url) show_date_window_xpath = '//*[@id="divSjri"]/div[1]' self.load_page_by_xpath(self.WAIT, show_date_window_xpath).click() start_date = start_date[0:4] + "-" + start_date[ 4:6] + "-" + start_date[6:8] end_date = end_date[0:4] + "-" + end_date[4:6] + "-" + end_date[6:8] self.update_date_range(start_date, end_date) self.get_current_window() # 获取最大页面 max_page_elem_xpath = "//*[@id='PageCont']/a[last()-2]" max_page_elem = self.load_page_by_xpath(self.WAIT, max_page_elem_xpath) max_page = str(max_page_elem.text).strip() print("最大页面", max_page) if max_page.__eq__("..."): max_page_elem.click() self.get_current_window() max_page_elem_xpath = "//*[@id='PageCont']/child::node()[last()-4]" max_page_elem = self.load_page_by_xpath(self.WAIT, max_page_elem_xpath) max_page_elem.click() max_page = str(max_page_elem.text).strip() print("最大页面", max_page) first_page_elem_xpath = "//*[@id='PageCont']/a[2]" self.load_page_by_xpath(self.WAIT, first_page_elem_xpath).click() max_page_num = int(max_page) for i in range(0, max_page_num): print("第", i + 1, "页") self.get_current_window() html = self.driver.page_source self.analysis_page_source(html, file_name) # 获取下一页元素 next_page = self.driver.find_element_by_xpath( "//*[@id='PageCont']/a[last()-1]") next_page.click() def get_history_dragon_tiger_list(self, start_date, end_date): """ 接口调用 :param start_date: yyyyMMdd :param end_date: yyyyMMdd :return: """ try: path = Utils.Utils.get_stock_data_path( ) + '/history_dragon_tiger_list' self.get_stock_data(path, start_date, end_date) finally: self.driver.quit()
def start(): browser = None while True: # 代理 try: ips = redisClient.getProxyData() for ip, status in ips.items(): ip = str(ip, encoding="utf-8") if common.isUseIp(ip): urls = [ "https://cn.iac-worldwide.com/api.php/Home/Taskdetail/index/if_id/826/sharefrom/8198" ] for url in urls: flag = redisClient.isExistsStartIP(url, ip) if (flag == False): try: redisClient.setUseStart(url, ip) options = Options() options.add_argument('-headless') profile = webdriver.FirefoxProfile() ip_ip = ip.split(":")[0] ip_port = int(ip.split(":")[1]) options.set_preference( 'network.proxy.type', 1) # 默认值0,就是直接连接;1就是手工配置代理。 options.set_preference('network.proxy.http', ip_ip) options.set_preference( 'network.proxy.http_port', ip_port) options.set_preference('network.proxy.ssl', ip_ip) options.set_preference( 'network.proxy.ssl_port', ip_port) options.set_preference( "network.http.use-cache", False) options.set_preference( "browser.cache.memory.enable", False) options.set_preference( "browser.cache.disk.enable", False) options.set_preference( "browser.sessionhistory.max_total_viewers", 3) options.set_preference( 'permissions.default.image', 2) ##禁用Flash options.set_preference( 'dom.ipc.plugins.enabled.libflashplayer.so', 'false') # 火狐浏览器 browser = webdriver.Firefox( executable_path='geckodriver', firefox_options=options) browser.set_page_load_timeout(20) browser.get(url) print(url) praisebg = WebDriverWait(browser, 10).until( EC.presence_of_element_located( (By.ID, "praisebg"))) redisClient.setUseIP(ip) except Exception as e: print(e) print("浏览url出错") break finally: if (browser != None): browser.close() else: redisClient.deleteProxyData(ip) except Exception as e: print(e) continue
from selenium import webdriver from selenium.webdriver.firefox.options import Options from selenium.webdriver.firefox.firefox_profile import FirefoxProfile from selenium.webdriver.common.keys import Keys import sys # macos_firefox.py # /Applications/Firefox.app/Contents/MacOS/firefox --private-window https://www.uol.com.br # Options firefox_options = Options() firefox_options.log.level = 'debug' firefox_options.add_argument('-private') firefox_options.accept_untrusted_certs = True firefox_options.assume_untrusted_cert_issuer = True firefox_options.binary_location = '/Applications/Firefox.app/Contents/MacOS/firefox' # firefox_options.headless = True # FirefoxProfile firefox_profile = FirefoxProfile(); firefox_profile.set_preference('browser.privatebrowsing.autostart', True) firefox_profile.set_preference('pdfjs.disabled', True) firefox_profile.set_preference('browser.download.folderList', 2) firefox_profile.set_preference('browser.download.panel.shown', False) firefox_profile.set_preference('browser.tabs.warnOnClose', False) firefox_profile.set_preference('browser.tabs.animate', False) firefox_profile.set_preference('browser.fullscreen.animateUp', 0) firefox_profile.set_preference('geo.enabled', False) firefox_profile.set_preference('browser.urlbar.suggest.searches', False) firefox_profile.set_preference('browser.tabs.warnOnCloseOtherTabs', False) firefox_profile.update_preferences()
accuracy is very low.. ''' from selenium.webdriver.firefox.options import Options from selenium import webdriver import os import sys from time import strftime,sleep options = Options() options.headless = True options.add_argument("user-data-dir="+os.path.dirname(sys.argv[0])) driver = webdriver.Firefox(options=options) driver.get("http://web.whatsapp.com") with open('qr.png', 'wb') as file: file.write(driver.find_element_by_xpath('/html/body/div[1]/div/div/div[2]/div[1]/div/div[2]/div/img').screenshot_as_png) name1=input("Please Enter First Person Name : ") name2=input("Please Enter Second Person Name : ") ot={name1:"60:00",name2:"50:30"} check=False def track(name):
def scrape(curr_url, hash, soup, results): print('Found elcorreodeespana.com...') # article for t in soup.find_all('article', id='article'): print('Getting wordpress article...') dt = {} dm = {} dm["id"] = str(hash) dm["type"] = 'article' dm["source"] = curr_url dm["meta"] = '' for c in t.find_all('div', class_='post-headbar'): dm["meta"] = dm["meta"] + utils.clean_soup(c) + ' ' dm["title"] = '' for c in t.find_all('h1', class_='post-title'): dm["title"] = dm["title"] + utils.clean_soup(c) + ' ' dt["meta"] = dm dt["text"] = '' for c in t.find_all('div', class_='post-content'): dt["text"] = dt["text"] + utils.clean_soup(c) + ' ' result = json.dumps(dt, ensure_ascii=False) results.append(result) print(result) # comments if len(soup.find_all('div', id='disqus_thread')) > 0: print('Getting disqus comments...') options = FirefoxOptions() options.add_argument("--headless") driver = webdriver.Firefox(options=options) driver.implicitly_wait(5) try: driver.get(curr_url) driver.execute_script("document.getElementById('disqus_thread').scrollIntoView();setTimeout(function(){},2000);") WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'disqus_thread'))) for i in driver.find_elements_by_tag_name('iframe'): if i.get_attribute('src').find('disqus.com/embed') >= 0: driver.get(i.get_attribute('src')) WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.CLASS_NAME, 'post-message'))) content = driver.page_source break except: # print('webdriver timeout... ') driver.close() try: for t in BeautifulSoup(content, "html.parser").find_all('div', class_='post-message'): dt = {} dm = {} dm["id"] = str(hash) dm["type"] = 'comment' dm["source"] = curr_url dt["meta"] = dm dt["text"] = utils.clean_soup(t) result = json.dumps(dt, ensure_ascii=False) results.append(result) print(result) except: # print('webdriver empty...')
from selenium.webdriver.firefox.options import Options CWD = os.path.dirname(os.path.abspath(__file__)) MS_WD = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) # get the flask web app from multiscanner.web.app import app as flask_app proxies = { "http": None, "https": None, } try: opts = Options() opts.add_argument('-headless') driver = webdriver.Firefox(firefox_options=opts) except Exception as e: pytestmark = pytest.mark.skip test_submitter_name = 'John Doe' test_submitter_email = '*****@*****.**' test_submitter_org = 'Testers' test_submitter_phone = '123-456-7890' test_submission_desc = 'A test document submission' class TestBase(LiveServerTestCase): @classmethod def setup_class(cls): cls.mock_server_port = get_free_server_port()
def initBrowser(): """launch firefox headless browser""" options = Options() options.add_argument('-headless') global browser browser = webdriver.Firefox(firefox_options=options)
def scrape(): options = Options() options.headless = shared.HEADLESS options.add_argument("--width=" + shared.PARAMS['scrape']['width']) options.add_argument("--height=" + shared.PARAMS['scrape']['height']) firefox_profile = webdriver.FirefoxProfile() for file in os.listdir(shared.EXTENSIONS_DIR): firefox_profile.add_extension( extension=os.path.join(shared.EXTENSIONS_DIR, file)) if 'firefox_profile_ure' in shared.CONFIG: for key, value in shared.CONFIG['firefox_profile_ure'].items(): firefox_profile.set_preference(key, int(value)) driver = webdriver.Firefox( firefox_profile=firefox_profile, options=options, executable_path=shared.DRIVER_PATH, service_log_path=shared.CONFIG['constant']['driver_log_file']) try: driver.set_page_load_timeout( int(shared.CONFIG['search']['driver_timeout'])) driver.get(shared.PARAMS['scrape']['ure']) xpaths = shared.PARAMS['xpath'] shared.wait_for_element_visible(driver, xpaths['geolocation']) driver.find_element_by_xpath(xpaths['geolocation']).send_keys( shared.CONFIG['search']['geolocation']) shared.wait_for_element_visible(driver, xpaths['geolocation']) driver.find_element_by_xpath(xpaths['geolocation']).send_keys( Keys.RETURN) results = driver.find_elements_by_xpath(xpaths['cookie_close_banner']) if results: driver.execute_script("arguments[0].click();", results[0]) shared.wait_for_element_visible(driver, xpaths['filter']) filter_el = driver.find_element_by_xpath(xpaths['filter']) driver.execute_script("arguments[0].click();", filter_el) shared.wait_for_element_visible(driver, xpaths['min_price']) driver.find_element_by_xpath(xpaths['min_price']).send_keys( shared.CONFIG['search']['min_price']) shared.wait_for_element_visible(driver, xpaths['max_price']) driver.find_element_by_xpath(xpaths['max_price']).send_keys( shared.CONFIG['search']['max_price']) shared.wait_for_element_visible(driver, xpaths['bedrooms_dropdown']) Select(driver.find_element_by_xpath( xpaths['bedrooms_dropdown'])).select_by_visible_text( shared.CONFIG['search']['bedrooms_dropdown']) shared.wait_for_element_visible(driver, xpaths['bathrooms_dropdown']) Select(driver.find_element_by_xpath( xpaths['bathrooms_dropdown'])).select_by_visible_text( shared.CONFIG['search']['bathrooms_dropdown']) shared.wait_for_element(driver, xpaths['under_contract_checkbox']) under_contract_element = driver.find_element_by_xpath( xpaths['under_contract_checkbox']) driver.execute_script("arguments[0].click();", under_contract_element) shared.wait_for_element_visible(driver, xpaths['square_feet_dropdown']) sqft_el = driver.find_element_by_xpath(xpaths['square_feet_dropdown']) driver.execute_script("arguments[0].scrollIntoView(true);", sqft_el) Select(sqft_el).select_by_visible_text( shared.CONFIG['search']['square_feet_dropdown']) shared.wait_for_element_visible(driver, xpaths['acres_dropdown']) Select(driver.find_element_by_xpath( xpaths['acres_dropdown'])).select_by_visible_text( shared.CONFIG['search']['acres_dropdown']) shared.wait_for_element_visible(driver, xpaths['update_search']) update_search = driver.find_element_by_xpath(xpaths['update_search']) driver.execute_script("arguments[0].click();", update_search) shared.wait_for_element_visible(driver, xpaths['results_listings']) shared.wait_for_invisible(driver, xpaths['results_spin_wrap']) page_sources = [driver.page_source] result = get_next(driver) while result: driver.execute_script("arguments[0].click();", result) shared.wait_for_element_visible(driver, xpaths['results_listings']) page_sources.append(driver.page_source) result = get_next(driver) finally: driver.quit() return page_sources
def headless_firefox_driver(self, driverpath): options = FirefoxOptions() options.add_argument('--headless') self.__browser = wd.Firefox(executable_path=driverpath, options=options)
def get_options_headlesschrome(self, download_dir): from selenium import webdriver from selenium.webdriver.chrome.options import Options options = Options(); prefs = {"download.default_directory": download_dir,"download.directory_upgrade": True,"download.prompt_for_download": False,"safebrowsing.enabled": False,"safebrowsing.disable_download_protection": True,"page.setDownloadBehavior": {'behavior': 'allow', 'downloadPath': download_dir}} options.add_experimental_option("prefs",prefs); options.add_argument("--test-type"); options.add_argument("--headless"); options.add_argument("--no-sandbox"); options.add_argument("--disable-gpu"); options.add_argument("--incognito"); options.add_argument("--disable-extensions"); #options.add_argument("window-size=1920x1080"); return options;
_LOGGER.setLevel(logging.DEBUG) logging.debug("test") HTML_PARSER = 'html.parser' ATTRIBUTION = 'Information provided by Aesop' LOGIN_URL = 'https://sub.aesoponline.com/Substitute/Home' LOGIN_TIMEOUT = 10 COOKIE_PATH = './aesop_cookies.pickle' CACHE_PATH = './aesop_cache' USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36' CHROME_WEBDRIVER_ARGS = [ '--headless', '--user-agent={}'.format(USER_AGENT), '--disable-extensions', '--disable-gpu', '--no-sandbox' ] CHROMEDRIVER_PATH = 'C:/Users/asaboo/Downloads/chromedriver_76/chromedriver' FIREFOXOPTIONS = Options() FIREFOXOPTIONS.add_argument("--headless") class AESOPError(Exception): """AESOP error.""" pass def _save_cookies(requests_cookiejar, filename): """Save cookies to a file.""" with open(filename, 'wb') as handle: pickle.dump(requests_cookiejar, handle) def _load_cookies(filename):
def setUp(self): """Set up test driver""" opts = Options() opts.add_argument('-headless') self.driver = webdriver.Firefox(firefox_options=opts) self.driver.get(self.get_server_url())
def __init__(self): options = Options() options.add_argument('--headless') self.browser = webdriver.Firefox(options=options) self.browser.set_page_load_timeout(30) self.browser.set_window_size(500, 500) #设置浏览器窗口大小
def main(): # Parse the command line arguments models = [ 'hash', 'rr', 'random', 'cloudflare', 'google', 'quad9', 'nextdns' ] parser = argparse.ArgumentParser() parser.add_argument('website') parser.add_argument('dns_type', choices=['dns', 'doh', 'dot', 'dnscrypt-proxy_doh']) parser.add_argument('trr_resolver_ip') parser.add_argument('trr_resolver_uri') parser.add_argument('model', choices=models) parser.add_argument('--timeout', type=int, default=45) args = parser.parse_args() dnscrypt_config_file = '/dnscrypt-proxy/dnscrypt-proxy/dnscrypt-proxy-{0}.toml'.format( args.model) # Enable devtools in Firefox options = Options() options.headless = True options.add_argument('-devtools') # Enable the netmonitor toolbox in devtools so we can save HARs profile = webdriver.FirefoxProfile() profile.set_preference('devtools.toolbox.selectedTool', 'netmonitor') # Set up DNS configuration subprocess.run( ["sudo", "cp", "/etc/resolv.conf", "/etc/resolv.upstream.conf"]) subprocess.run(["sudo", "cp", "resolv.conf", "/etc/resolv.conf"]) if args.dns_type == 'dnscrypt-proxy_doh': subprocess.run( "sudo /dnscrypt-proxy/dnscrypt-proxy/dnscrypt-proxy -config {0} &> /dev/null &" .format(dnscrypt_config_file), shell=True) subprocess.run(["sudo", "sleep", "5s"]) # Configure the DNS settings in Firefox if args.dns_type == 'dns' or args.dns_type == 'dot' or args.dns_type == 'dnscrypt-proxy_doh': options.set_preference('network.trr.mode', 0) elif args.dns_type == 'doh': options.set_preference('network.trr.mode', 3) options.set_preference('network.trr.request-timeout', 1500) options.set_preference('network.trr.max-fails', 5) trr_resolver_ip = args.trr_resolver_ip trr_resolver_uri = args.trr_resolver_uri if trr_resolver_ip: options.set_preference('network.trr.bootstrapAddress', trr_resolver_ip) if trr_resolver_uri: options.set_preference('network.trr.uri', trr_resolver_uri) # Launch Firefox and install our extension for getting HARs driver = webdriver.Firefox(options=options, firefox_profile=profile, firefox_binary="/opt/firefox/firefox-bin") driver.install_addon("/home/seluser/measure/harexporttrigger-0.6.2-fx.xpi") driver.set_page_load_timeout(args.timeout) # Make a page load started = datetime.now() driver.get(args.website) # Once the HAR is on disk in the container, write it to stdout so the host machine can get it har_file = "/home/seluser/measure/har.json" def har_file_ready(): return os.path.exists(har_file + ".ready") while (datetime.now() - started).total_seconds() < args.timeout \ and not har_file_ready(): time.sleep(1) if har_file_ready(): with open(har_file, 'rb') as f: sys.stdout.buffer.write(f.read()) driver.quit()