def check_exists(self, val): print("Testing Footer section.........Ok") try: webdriver.find_element_by_id(val) except NoSuchElementException: return False return True
def login(server_url, webdriver): webdriver.get(server_url) # Click login link, switch to persona window webdriver.find_element_by_id('login').click() webdriver.switch_to_window('__persona_dialog') # Enter email address and password email_address_input = WebDriverWait(webdriver, 5).until( EC.element_to_be_clickable((By.XPATH, '//*[@id="authentication_email"]'))) email_address_input.send_keys(config.PERSONA_USER) webdriver.find_element_by_xpath('//*[@id="authentication_form"]/p[4]/button[1]').click() password_input = WebDriverWait(webdriver, 10).until( EC.element_to_be_clickable((By.XPATH, '//*[@id="authentication_password"]'))) password_input.send_keys(config.PERSONA_PASS) # Submit login form and switch back to previous window webdriver.find_element_by_xpath('//*[@id="authentication_form"]/p[4]/button[3]').click() webdriver.switch_to_window('') # Wait until profile name is clickable WebDriverWait(webdriver, 10).until( EC.element_to_be_clickable((By.ID, 'profile')))
def fb_login(webdriver, email, psswd): sign_in_email = webdriver.find_element_by_id('email') sign_in_email.send_keys(email) sign_in_pas = webdriver.find_element_by_id('pass') sign_in_pas.send_keys(psswd) sign_in_button = webdriver.find_element_by_id('u_0_n') sign_in_button.click()
def main(): items = [] #Log in webdriver.get(MAIN_PAGE_URL) webdriver.find_element_by_id( 'ctl00_ContentPlaceHolder1_Username').send_keys( credentials['username']) webdriver.find_element_by_id( 'ctl00_ContentPlaceHolder1_Password').send_keys( credentials['password']) webdriver.find_element_by_name('ctl00$ContentPlaceHolder1$ctl04').click() #Set items to show = 100 webdriver.find_element_by_xpath( "//select[@name='ctl00$ContentPlaceHolder1$GridView1$ctl13$ctl11']/option[text()='100']" ).click() #Getting number of pages page_number = len( webdriver.find_elements_by_xpath( "//tr[@class='grid-pager']//table//tr/td[not(@class)]")) page_href_script = "__doPostBack('ctl00$ContentPlaceHolder1$GridView1','Page$%s')" #Extracting each page on the website for i in range(page_number): i += 1 if i != 1: webdriver.execute_script(page_href_script % i) # Wait for redirecting time.sleep(10) items += extract() #Insert into database all extracted items (rigs) insert_into_database(items)
def shot(cap, url, ads=None): webdriver = webdriver.Remote(hubUrl, desired_capabilities=cap, proxy=proxy) webdriver.set_window_size(1024, 800) webdriver.get(url) if ads: sizeJs = "elm=document.getElementById(\"%s\");\ elm.style.display=\"block\";elm.style.width=\"%s px\";\ elm.style.height=\"%s px\";elm.style.overflow=\"hidden\";" for ad in ads: elm = webdriver.find_element_by_id(ad["id"]) if ad["size"] != (elm.size["width"], elm.size["height"]): webdriver.execute_script(sizeJs % (ad["id"], ad["size"][0], ad["size"][1])) try: ads = webdriver.execute_script("return pozice;") except selenium.common.exceptions.WebDriverException: ads = [] for ad in ads: elm = webdriver.find_element_by_id(ad["id"]) ad["size"] = elm.size["width"], elm.size["height"] ad["location"] = elm.location["x"], elm.location["y"] for dId in debugIds: webdriver.execute_script(removeDebug % dId) _, filename = tempfile.mkstemp() webdriver.save_screenshot(filename) data = open(filename).read() os.remove(filename) return data, ads
def login(webdriver): webdriver.get(url) webdriver.implicitly_wait(10) webdriver.switch_to.frame(webdriver.find_element_by_tag_name('iframe')) webdriver.find_element_by_id('username').send_keys(USERNAME) webdriver.find_element_by_id('password').send_keys(PASSWORD) webdriver.find_element_by_xpath("//input[@type='image']").click() webdriver.implicitly_wait(5)
def login_by_form(self, usr, pswd, webdriver): username = webdriver.find_element_by_id("id_username") password = webdriver.find_element_by_id("id_password") username.send_keys(usr) password.send_keys(pswd) webdriver.find_element_by_css_selector("input[type='submit']").click()
def shot(webdriver, browser, testId, buildId): proxy = ServerProxy(uploadUrl) webdriver.set_window_size(1024, 800) ret = proxy.remoteFile.read(libs.path.uploadPath(testId, browser, "original", "ads.pickle")) if ret["status"] == 200: originExists = True else: originExists = False if not originExists: ads1 = loads(ret["data"].data) if ads1: sizeJs = "elm=document.getElementById(\"%s\");\ elm.style.display=\"block\";\ elm.style.width=\"%s px\";elm.style.height=\"%s px\";\ elm.style.overflow=\"hidden\";" for ad in ads1: elm = webdriver.find_element_by_id(ad["id"]) if ad["size"] != (elm.size["width"], elm.size["height"]): webdriver.execute_script(sizeJs % (ad["id"], ad["size"][0], ad["size"][1])) try: ads2 = webdriver.execute_script("return pozice;") except selenium.common.exceptions.WebDriverException: ads2 = [] for ad in ads2: elm = webdriver.find_element_by_id(ad["id"]) ad["size"] = elm.size["width"], elm.size["height"] ad["location"] = elm.location["x"], elm.location["y"] for dId in debugIds: webdriver.execute_script(removeDebug % dId) _, filename = tempfile.mkstemp() webdriver.save_screenshot(filename) data = open(filename).read() os.remove(filename) adsPickled = dumps(ads2) if originExists: proxy.remoteFile.write(libs.path.uploadPath(testId, browser, buildId, "build.png"), Binary(data), False) proxy.remoteFile.write(libs.path.uploadPath(testId, browser, buildId, "ads.pickle"), Binary(adsPickled), False) else: proxy.remoteFile.write(libs.path.uploadPath(testId, browser, "original", "original.png"), Binary(data), False) proxy.remoteFile.write(libs.path.uploadPath(testId, browser, "original", "ads.pickle"), Binary(adsPickled), False)
def login_by_form(self, usr, pswd, webdriver): username = webdriver.find_element_by_id("id_username") password = webdriver.find_element_by_id("id_password") username.send_keys(usr) password.send_keys(pswd) self.wait_for_element_to_be_clickable_with_css_selector_click( "input[type='submit']")
def create_comment(webdriver, content): """ Prerequisite: must be viewing an item with a comment box. """ comment_box = WebDriverWait(webdriver, 5).until( EC.element_to_be_clickable((By.ID, 'reply-box-textarea'))) comment_box.send_keys(content) webdriver.find_element_by_id('submit').click()
def create_comment_reply(webdriver, content, comment_id): """ Prerequisite: must be viewing an item with the comment with comment_id on the page. """ WebDriverWait(webdriver, 5).until( EC.element_to_be_clickable((By.ID, 'comment' + comment_id + 'reply'))).click() comment_box = WebDriverWait(webdriver, 5).until( EC.element_to_be_clickable((By.ID, 'reply-box-textarea'))) comment_box.send_keys(content) webdriver.find_element_by_id('submit').click()
def test_baidu(self): webdriver = self.driver webdriver.get(self.base_url + "/") webdriver.find_element_by_id('translateContent').clear() webdriver.find_element_by_id('translateContent').send_keys("webdriver") webdriver.find_element_by_xpath('//*[@id="form"]/button').click() time.sleep(2) title = webdriver.title self.assertEqual( title, "【webdriver】什么意思_英语webdriver的翻译_音标_读音_用法_例句_在线翻译_有道词典")
def _perform_login(webdriver=None): webdriver.get(f"{ORIGIN_URL}/accounts/login/") user_ele = webdriver.find_element_by_id("username") user_ele.send_keys(USERNAME) pass_ele = webdriver.find_element_by_id("password") pass_ele.send_keys(PASSWORD) login_btn = webdriver.find_element_by_id("login") login_btn.click()
def scrape_olr(neighborhood_code, webdriver): username = os.getenv('JRX_USER') password = os.getenv('JRX_PASSWORD') if not username or not password: raise Exception('The environment variables JRX_USER and JRX_PASSWORD must be set.') username_input = webdriver.find_element_by_id('ctl00_ContentPlaceHolder1_txtUserName') password_input = webdriver.find_element_by_id('ctl00_ContentPlaceHolder1_txtPassword') username_input.send_keys(username) time.sleep(1) password_input.send_keys(password) login_button = webdriver.find_element_by_id('ctl00_ContentPlaceHolder1_Loginexceed') time.sleep(1) login_button.click() time.sleep(1) customer_link = webdriver.find_element_by_xpath("//a[@data-label='View Customers']") webdriver.execute_script("arguments[0].click();", customer_link) time.sleep(2) customer_grid = webdriver.find_element_by_xpath("//table[@id='ctl00_ctl00_MyCustContent_MyCustContent_gvParentAjax']") jr_tblentry = customer_grid.find_element_by_xpath("//tr[@id='tr_788817']") target_div = jr_tblentry.find_element_by_xpath("//div[@id='divDetail_788817']") # click on expand button expand_btn = customer_grid.find_element_by_xpath("//input[@id='imgExpColl_788817']") webdriver.execute_script("arguments[0].click();", expand_btn) time.sleep(1) #nested_tables = target_div.find_elements_by_tag_name('table') #print(dir(nested_tables)) #print('### %d nested tables found in target div.' % len(nested_tables)) #print('### found target table.') search_link_condos = target_div.find_element_by_xpath("//a[@href='MyCustomerImageHndlr.aspx?Type=Runsearch&SearchID=956274&RunSearchType=AdvancedSale&cid=788817&From=View']") webdriver.execute_script("arguments[0].click();", search_link_condos) time.sleep(1) top_div = webdriver.find_element_by_xpath("//div[@class='container_1130 search-ui']") detail_areas = top_div.find_elements_by_xpath("//div[@class='apt_details_area clearfix']") print('### %s listings found.' % len(detail_areas)) for da in detail_areas: #apt_detail = da.find_element_by_xpath("//div[@class='apt_details_left']") markup = da.get_attribute('innerHTML') soup_parser = BeautifulSoup(markup, 'html.parser') scrape_olr_condo_listings(soup_parser, markup) #print(markup) print('#################################################')
def initNJUJw(user, pwd): userInput = webdriver.find_element_by_name('userName') pwdInput = webdriver.find_element_by_name('password') userInput.send_keys(user) pwdInput.send_keys(pwd) sub = webdriver.find_element_by_class_name('Btn') sub.click() check = 'UserInfo' try: webdriver.find_element_by_id(check) except Exception, e: raise e return False
class Selectors(): # def sel(self): # driver = webdriver.Chrome() global project_code project_code = webdriver.find_element_by_id("record.code") global note note = webdriver.find_element_by_id("record.note") global address address = webdriver.find_element_by_id("record.property.address2") global client client = webdriver.find_element_by_id("record.property.clientName") global contact contact = webdriver.find_element_by_id("record.propertyContactDto.name")
def visiblePainted(self): #self.driver.execute_script("var div_element = document.createElement('div');div_element.setAttribute('style', 'position: absolute; left: 100px; top: 380px; width:10px; height:10px; background-color:white;');div_element.setAttribute('id', 'test');var parent_object = document.getElementById('swiffycontainer');parent_object.appendChild(div_element);") time.sleep(10) #self.click("div","id","test",1) #self.click("div","xpath",'//*[@id="swiffycontainer"]/div[1]/svg/g/g/g/g/g[1]/g[1]/g/path',1) webdriver = self.driver target = webdriver.find_element_by_id("swiffycontainer")
def access_Trade(url): driver = webdriver.Chrome( executable_path="/Users/charlesraymond/Documents/chromedriver") driver.get(url) sleep(3) driver.find_element_by_link_text("Trade").click() driver.find_element_by_id("symbol").send_keys( "UGAZ") #.send_keys(Keys.ENTER) driver.find_element_by_link_text("Stocks & ETFs").click() sleep(3) select = Select(driver.find_element_by_id('action')) select.select_by_value('SELL') select = Select(driver.find_element_by_id('orderType')) select.select_by_value('MARKET')
def getNimDoc(search_query, number): wait = WebDriverWait(webdriver, 2) webdriver.get(url) search = webdriver.find_element_by_id("searchInput") search.send_keys(search_query + Keys.RETURN) sleep(2) page = webdriver.page_source soup = BeautifulSoup(page, 'html.parser') filter1 = soup.find_all('div', class_='nine columns') filter2 = filter1[0] filter3 = filter2.find_all('div', class_='search_results') filter4 = filter3[0] filter5 = filter4.find_all('li') n = 1 for i in filter5: if n <= number: get_title = i.a.text get_url = i.find('a').get('href') print(n, "Title = ", get_title, '\n', "Url = ", get_url, '\n') print( "-------------------------------------------------------------------------------------------------" ) n += 1 else: break
def main(): webdriver = get_webdriver() data = webdriver.find_element_by_id( "ctl00_contentPlaceHolderConteudo_lblTitulo").text data = data.split('Carteira Teórica do Ibovespa válida para ')[1] date_object = datetime.strptime(data, '%d/%m/%y').date() print(date_object) table_content = webdriver.find_element_by_id( 'ctl00_contentPlaceHolderConteudo_grdResumoCarteiraTeorica_ctl00' ).get_attribute('innerHTML') # Fecha navegador webdriver.quit() dados = fetch_portfolio_composition(table_content) for row in dados: print(row[1])
def load_data(webdriver): for url in URLS: #Get the contents of the URL webdriver.get(url) print(url) # Jialong add # select element by id: webdriver.find_element_by_css_selector("input[type='radio'][name='region'][value='QC']").click() webdriver.find_element_by_id("language-region-set").click() # time.sleep(10) #returns the inner HTML as a string innerHTML = webdriver.page_source #turns the html into an object to use with BeautifulSoup library # print("innerHTML is:") # print(innerHTML) soup = BeautifulSoup(innerHTML, "html.parser") extract_and_load_all_data(soup)
def getmail(): url2="https://temp-mail.org/en/" chromedriver_path = r'C:\Users\Orkideh\Downloads\chromedriver_win32/chromedriver.exe' # Change this to your own chromedriver path! webdriver = webdriver.Chrome(executable_path=chromedriver_path) sleep(2) webdriver.get(url2) sleep(3) email_temp = webdriver.find_element_by_id('mail') final_mail=email_temp.text create_insta_ac(final_mail)
def imgGetURL(webdriver): """ Gets IMG url for download webdriver: Selenium's webdriver return: string with URL """ element = webdriver.find_element_by_id("comic_page") url = element.get_attribute("src") return url
def form_data(webdriver): input1 = webdriver.find_element_by_tag_name("input") input1.send_keys("Ivan") input2 = webdriver.find_element_by_name("last_name") input2.send_keys("Petrov") input3 = webdriver.find_element_by_class_name("city") input3.send_keys("Smolensk") input4 = webdriver.find_element_by_id("country") input4.send_keys("Russia") button = webdriver.find_element_by_xpath(xpath) button.click() return
def create_microcosm(server_url, webdriver, title, description): """ Prerequisite: must be viewing a site and have create permission. """ webdriver.get(server_url) create_microcosm = WebDriverWait(webdriver, 5).until( EC.element_to_be_clickable((By.ID, 'create_microcosm'))) create_microcosm.click() WebDriverWait(webdriver, 5).until( EC.element_to_be_clickable((By.ID, 'title'))).send_keys(title) WebDriverWait(webdriver, 5).until( EC.element_to_be_clickable((By.ID, 'description'))).send_keys(description) webdriver.find_element_by_id('submit').click() WebDriverWait(webdriver, 5).until( EC.element_to_be_clickable((By.ID, 'title')))
def login_page(cls, webdriver): '''Logins into the fake account''' print(delay) WebDriverWait(webdriver, delay).until( EC.frame_to_be_available_and_switch_to_it( (By.ID, "alibaba-login-box"))) #makes <iframe> login-form useable WebDriverWait( webdriver, delay ) #waits for a random amount of time, so that aliexpress cannot detect this app as a robot webdriver.find_element_by_xpath('//*[@id="fm-login-id"]').send_keys( "*****@*****.**" ) #Finds username box and enters username of fake account WebDriverWait(webdriver, delay) webdriver.find_element_by_id("fm-login-password").send_keys( "Cosmos000" ) #Finds password box and enters password of fake account WebDriverWait(webdriver, delay) webdriver.find_element_by_xpath('//*[@id="login-form"]/div[5]/button' ).click() #Clicks the log-in button WebDriverWait(webdriver, delay)
def auth_with_data(self, webdriver, epicgames_login, epicgames_pswd): webdriver.get( 'https://www.epicgames.com/id/login?redirectUrl=https%3A%2F%2Fwww.epicgames.com%2Fstore%2Fru%2F&noHostRedirect=true' ) wait_for_element = WebDriverWait(webdriver, TIMEOUT).until( EC.element_to_be_clickable((By.ID, 'usernameOrEmail'))) webdriver.find_element_by_id('usernameOrEmail').click() webdriver.find_element_by_id('usernameOrEmail').send_keys( epicgames_login) webdriver.find_element_by_id('password').send_keys(epicgames_pswd) wait_for_element = WebDriverWait(webdriver, TIMEOUT).until( EC.element_to_be_clickable((By.ID, 'login'))) webdriver.find_element_by_id('login').send_keys(Keys.RETURN) time.sleep(30) if re.findall('(\w+)-(\w+)-(\w+)', webdriver.find_element_by_tag_name('body').text): return 'need_reset' return 'ok'
def request_to_reset_pswd(self, webdriver): webdriver.get('https://www.epicgames.com/id/login/forgot-password') wait_for_element = WebDriverWait(webdriver, TIMEOUT).until( EC.element_to_be_clickable((By.ID, 'email'))) print(wait_for_element) webdriver.find_element_by_id('email').click() # YANDEXDNS CHANGE IN FUTURE webdriver.find_element_by_id('email').send_keys(self.epicgames_login + '@yandex.ru') wait_for_element = WebDriverWait(webdriver, TIMEOUT).until( EC.element_to_be_clickable((By.ID, 'send'))) webdriver.find_element_by_id('send').send_keys(Keys.RETURN)
def get_table_content(tableId, queryContent): arr = [] arr1 = [] table_loc = webdriver.find_element_by_id(tableId) # 按行查询表格的数据,取出的数据是一整行,按空格分隔每一列的数据 table_tr_list = webdriver.find_element(*table_loc).find_elements( By.TAG_NAME, "tr") for tr in table_tr_list: arr1 = (tr.text).split(" ") #以空格拆分成若干个(个数与列的个数相同)一维列表 print(tr.text) # print(arr1) arr.append(arr1) #将表格数据组成二维的列表 #循环遍历table数据,确定查询数据的位置 for i in range(len(arr)): for j in range(len(arr[i])): if queryContent == arr[i][j]: print("%r坐标为(%r,%r)" % (queryContent, i + 1, j + 1))
def navigateToCourse(dbname,dbpwd): li=webdriver.find_element_by_id('teachinginfo') courses=li.find_element_by_tag_name('a') courses.click() Function=webdriver.find_element_by_id('Function') funcLinks=Function.find_elements_by_tag_name('a') funcLinks[1].click() btSearch=webdriver.find_element_by_id('btSearch') db,cur=dbInit(dbname,dbpwd) termList=webdriver.find_element_by_id('termList') termList=termList.find_elements_by_tag_name("option") gradeList=webdriver.find_element_by_id('gradeList') gradeList=gradeList.find_elements_by_tag_name("option") academySelect=webdriver.find_element_by_id('academySelect') academySelect=academySelect.find_elements_by_tag_name("option") i=0 j=0 while i<2: i=i+1 termList[i].click() j=0 while j<4: j=j+1 gradeList[j].click() k=1 while k<len(academySelect): academySelect[k].click() academy=academySelect[k].text specialitySelect=webdriver.find_element_by_id('specialitySelect') specialitySelect=specialitySelect.find_elements_by_tag_name("option") ki=1; while ki<len(specialitySelect): specialitySelect[ki].click() special=specialitySelect[ki].text btSearch.click() if i == 1: term='b' else : term='a' print term,gradeList[j].text,academy,special getCoursesList(db,cur,academy,special,term) ki=ki+1 k=k+1 dbClose(db,cur)
def find(webdriver, by, css_selector_val): ''' Wrapper function of selenium python to find an elment using locator and locator_value(css_selector_val) Arguments --------- webdriver - object of selenium.webdriver.chrome.webdriver.WebDriver . by - element locator name . contraint: expected value:- id, name, xpath, link_text, partial_link_text, tag_name, class_name, css_selector other value than the expected will return None css_selector_val- value for the element locator i.e. arg 'by' example:- to find all elements with class_name=contact, value for css_selector_val is 'contact' Return --------- Webelement - if the value of arg 'by' is an expected value or None - if the value of arg 'by' is an unexpected value ''' if by == 'id': return webdriver.find_element_by_id(css_selector_val) if by == 'name': return webdriver.find_element_by_name(css_selector_val) if by == 'xpath': return webdriver.find_element_by_xpath(css_selector_val) if by == 'link_text': return webdriver.find_element_by_link_text(css_selector_val) if by == 'partial_link_text': return webdriver.find_element_by_partial_link_text(css_selector_val) if by == 'tag_name': return webdriver.find_element_by_tag_name(css_selector_val) if by == 'class_name': return webdriver.find_element_by_class_name(css_selector_val) if by == 'css_selector': return webdriver.find_element_by_css_selector(css_selector_val) else: return None
import time import math link = "http://suninjuly.github.io/explicit_wait2.html" def calc(x): return str(math.log(abs(12 * math.sin(int(x))))) try: webdriver = webdriver.Chrome() webdriver.get(link) #wait 5 sec for each element webdriver.implicitly_wait(5) #WebDriverWait и expected_conditions) price = WebDriverWait(webdriver, 12).until( EC.text_to_be_present_in_element((By.ID, "price"), "$100")) book = webdriver.find_element_by_id("book") book.click() x = webdriver.find_element_by_id("input_value").text y = calc(x) input_field = webdriver.find_element_by_id('answer').send_keys(y) submit = webdriver.find_element_by_id('solve').click() finally: time.sleep(5) webdriver.quit()
from selenium.webdriver.common.by import By #browser = Browser() fp = webdriver.FirefoxProfile() fp.set_preference("browser.download.folderList",2) fp.set_preference("browser.download.dir", "/home/dhl/download") fp.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/octet-stream") webdriver = webdriver.Firefox(firefox_profile=fp) webdriver.get('https://nolb.dhl.de/nextt-online-business/jsp/login.do') #Loging in webdriver.find_element_by_id('login').send_keys('user') webdriver.find_element_by_id('password').send_keys('password') webdriver.find_element_by_name('doLogin').click() f = open("/home/dhl/shipment_numbers.csv") wait = WebDriverWait(webdriver, 60 * 60) for line in f: #Searching for shipment code webdriver.find_element_by_id('shipmentCode').send_keys(line) webdriver.find_element_by_id('timeIntervall').send_keys('12') webdriver.find_element_by_name('search_ta').click() #Downloading pdf try: actionSelection = wait.until(EC.element_to_be_clickable((By.ID,'pageActionSelect')))
from selenium.webdriver.common.keys import Keys from time import sleep, strftime from random import randint import pandas as pd # Change this to your own chromedriver path! chromedriver_path = 'C:\Program Files\chromedriver.exe' webdriver = webdriver.Chrome(executable_path=chromedriver_path) sleep(2) webdriver.get('https://www.instagram.com/accounts/login/?source=auth_switcher') sleep(3) facebook = webdriver.find_element_by_xpath( "/html/body/div[1]/section/main/div/div/div[1]/div/form/div/div[5]/button") facebook.click() username = webdriver.find_element_by_id('email') username.send_keys('email_facebook') password = webdriver.find_element_by_id('pass') password.send_keys('password_here') button_login = webdriver.find_element_by_id('loginbutton') button_login.click() # print(button_login) sleep(10) Not_Now = webdriver.find_element_by_xpath( "/html/body/div[4]/div/div/div/div[3]/button[2]") Not_Now.click() # notnow = webdriver.find_element_by_css_selector('body > div:nth-child(13) > div > div > div > div.mt3GC > button.aOOlW.HoLwm') # notnow.click() #comment these last 2 lines out, if you don't get a pop up asking about notifications
from selenium import webdriver webdriver = webdriver.Firefox() webdriver.implicitly_wait(10) webdriver.maximize_window() webdriver.get("http://www.baidu.com") keyword = webdriver.find_element_by_id("kw") keyword.clear() keyword.send_keys("山东") keyword.submit() products = webdriver.find_elements_by_xpath("//div[contains(@class, 'c-abstract')]") print("Found " + str(len(products)) + "products:") for product in products: print(product.text) webdriver.close()
def extract(): # Get main table and select all urls to items pages table_content = webdriver.find_element_by_id( 'ctl00_ContentPlaceHolder1_GridView1').get_attribute('innerHTML') document = html.fromstring(table_content) count = len( document.xpath( './/tr[position()>1 and position()<last()]/td[position()=1]')) links = [ "__doPostBack('ctl00$ContentPlaceHolder1$GridView1','Select$%s')" % i for i in range(count) ] trs = document.xpath('.//tr[position()>1 and position()<last()]') # Adding information from the main page items = [] for tr in trs: d = {} tds = tr.xpath('.//td[position()>1]//text()') d['contractor'] = tds[0] d['number'] = tds[1] d['operator'] = tds[2] d['well_num_name'] = tds[3] d['basin'] = tds[4] d['county'] = tds[5] d['state'] = tds[6] d['section'] = tds[7] d['township'] = tds[8] d['range'] = tds[9] d['pd'] = tds[10] d['status'] = tds[11] d['notes'] = tds[12] d['date_scraped'] = datetime.now() well_num_name = d['well_num_name'].split(' ') well_name = '' well_num = '' i = 0 for ind, part in enumerate(well_num_name): if '-' not in part: well_name += part + ' ' else: i = ind break well_num = ' '.join(well_num_name[i:-1]) + ' ' + well_num_name[-1] d['well_name'] = well_name.strip() d['well_num'] = well_num.strip() items.append(d) # Iterating thru items pages for i, link in enumerate(links): webdriver.execute_script(link) # Redirecting is processing by javascript, so we have to wait while page is downloading wait.until( expected_conditions.presence_of_element_located(( By.XPATH, "//div[@id='ctl00_ContentPlaceHolder1_FormView1_PanelForEditTemplate']" ))) html_content = webdriver.find_element_by_id( 'pubnavcontent').get_attribute('innerHTML') document = html.fromstring(html_content) spans = document.xpath("//span[@class='roform']//text()") div = list( filter(None, document.xpath("//div[@class='previewborder']//text()"))) items[i]['sub_section'], items[i]['updated'] = get_additional_info( spans, div) webdriver.back() time.sleep(random.randint(2, 15)) return items
from selenium import webdriver import time webdriver = webdriver.Chrome() webdriver.get( r"file:///C:/Users/baba123/Desktop/%E6%96%B0%E5%BB%BA%E6%96%87%E4%BB%B6%E5%A4%B9%20(2)/%E7%BB%83%E4%B9%A0%E7%9A%84html/main.html" ) webdriver.maximize_window() webdriver.switch_to.frame("frame") webdriver.find_element_by_id("input1").send_keys("我是你爸爸")
def parse_url(start_url_ext, idx, webdriver, location=False, _filter=False): """Parse a Trip Advisor hotel page and scrape review information: rating, review, and review title. Optional to scrape location details. Args: start_url_ext (str): Trip Advisor hotel page to parse idx (int): current page index, 0 through n, used for print statement webdriver (Selenium WebDriver): browser tool allowing for interaction with website location (bool, optional): Option to return location details. Defaults to False. Returns: page_reviews: list of strings page_ratings: list of strings page_titles: list of integers, 0 - 5 if location = True: location (tuple): (full hotel name, city, state) """ domain = "https://www.tripadvisor.com" # Define waits, moved from stale element 'try' 1/18pm ignored_exceptions = (NoSuchElementException, StaleElementReferenceException, TimeoutException) wait = WebDriverWait(webdriver, 10, ignored_exceptions=ignored_exceptions) # Catch for webdriver time out try: webdriver.get(domain + start_url_ext) # 1/18 reduced from 5 to 3 except TimeoutException: pass if _filter == True: # ACTIVATE low filters to scrape only low reviews try: for f in [3, 2, 1]: # level = f"ReviewRatingFilter_{f}" # webdriver.find_element_by_id(level).click() level = f"ReviewRatingFilter_{f}" wait.until(EC.element_to_be_clickable((By.ID, level))) webdriver.execute_script("arguments[0].click();", (webdriver.find_element_by_id(level))) print(f"filter{f}") except: pass # Catch for webdriver stale element try: # ignored_exceptions = (NoSuchElementException, StaleElementReferenceException, TimeoutException) # wait = WebDriverWait(webdriver, 10, ignored_exceptions=ignored_exceptions) wait.until(EC.element_to_be_clickable((By.CLASS_NAME, "_3maEfNCR"))) except TimeoutException: pass # Find 'read more' buttons all_more_buttons = webdriver.find_elements_by_class_name("_3maEfNCR") # If 'read more' available, activate to expand text, only need to click one if all_more_buttons: try: all_more_buttons[0].click() print('click') except StaleElementReferenceException: pass # Set soup page_source = webdriver.page_source soup = BeautifulSoup(page_source, 'html.parser') # Scrape the ratings data page_reviews, page_ratings, page_titles = retrieve_reviews_ratings( soup, idx) # If location data requested, gather it if location == False: return page_reviews, page_ratings, page_titles else: location = retrieve_location(soup) return page_reviews, page_ratings, page_titles, location
def openBrowser(firstParam, secondParam): webdriver.get(url=firstParam + "/") webdriver.find_element_by_id("kw").click() webdriver.find_element_by_id("kw").clear() webdriver.find_element_by_id("kw").send_keys(secondParam) webdriver.find_element_by_id("su").click()
# -*-coding:utf-8-*- from selenium import webdriver from selenium.webdriver.common.keys import Keys # 创建PhantomJS浏览器对象 webdriver = webdriver.PhantomJS( executable_path=r'D:\phantomjs-2.1.1-windows\bin\phantomjs.exe') webdriver.get('https://www.douban.com/') # 截屏 webdriver.save_screenshot('douban.png') captcha = raw_input('验证码:\n') # 输入账号 webdriver.find_element_by_id('form_email').send_keys('13960942437') # 输入密码 webdriver.find_element_by_id('form_password').send_keys('wo951127') # 输入验证码 webdriver.find_element_by_id('captcha_field').send_keys(captcha) #点击登陆 webdriver.find_element_by_class_name('bn-submit').click() webdriver.save_screenshot('login.png')