def Cfci(): D.get('https://pearson.coupahost.com/invoices') timeout = 10 try: wdw(D, timeout).until(ec.visibility_of_element_located((By.XPATH, '//*[@id="pageHeader"]'))) D.find_element_by_xpath('//*[@id="invoice_header_filter"]/option[2]').click() for co,coo in enumerate(CoupaInv): D.find_element_by_xpath('//*[@id="sf_invoice_header"]').send_keys(coo[0]) D.find_element_by_xpath('//*[@id="invoice_header_data_table_form_search"]/div[1]/table/tbody/tr/td[2]/table/tbody/tr/td[7]/div/a').send_keys(Keys.ENTER) time.sleep(7) if D.find_element_by_xpath('//*[@id="invoice_header_tbody"]/tr/td').get_attribute('innerText') == 'Nothing matching your search was found.': COUPRIC.append(coo) D.find_element_by_xpath('//*[@id="sf_invoice_header"]').clear() else: data = D.find_element_by_xpath('//*[@id="invoice_header_tbody"]') dat = data.find_elements_by_tag_name('tr') getData(dat,newData,coo,newDataVen,'Coup') if len(newData) > 1: Multi_Invoice_Data(newDataVen) else: Single_Invoice_Date(newDataVen) D.find_element_by_xpath('//*[@id="sf_invoice_header"]').clear() print(str(co+1)+'. '+coo[0]) except te: print('There is problem in COUPA portal, please check and re-run') D.quit()
def StaRt(self, ent): # D = VMTermsUpdate.Driv(self) if ent.lower() == 'us': VMTermsUpdate.D.get( 'https://ebs.bizsys.pearson.com/OA_HTML/RF.jsp?function_id=1348&resp_id=52134&resp_appl_id=200&security_group_id=0&lang_code=US&oas=UkWdsrjY0wo5zsHT94VgLg..¶ms=3186AIGAI8sS0D7oPtsTLlA3r.pdeS9lKBabtgXem3U' ) #US elif ent.lower() == 'ca': VMTermsUpdate.D.get( ' https://ebs.bizsys.pearson.com/OA_HTML/RF.jsp?function_id=1348&resp_id=52088&resp_appl_id=200&security_group_id=0&lang_code=US&oas=AbzQpn5vQmmzmzzaKOIxWg..¶ms=3186AIGAI8sS0D7oPtsTLlA3r.pdeS9lKBabtgXem3U' ) #Cannada # D.get('http://*****:*****@id="user-name-txt"]'))) print('Complete MFA') except te: print('Time Up') VMTermsUpdate.D.quit() timeout = 60 try: wdw(VMTermsUpdate.D, timeout).until( ec.visibility_of_element_located( (By.XPATH, '//*[@id="region10"]/div[1]/table/tbody/tr/td/h1'))) except te: print('You Did not Enter the MFA Code') VMTermsUpdate.D.quit()
def open_driver(first_login=False): global driver driver = webdriver.Firefox() driver.get("https://www.instagram.com/accounts/login/") login = ec.presence_of_element_located((By.NAME, 'username')) wdw(driver, 15).until(login) uname_field = driver.find_element_by_name('username') username = '******' for i in username: uname_field.send_keys(i) time.sleep(0.2) pword_field = driver.find_element_by_name('password') password = '******' for i in password: pword_field.send_keys(i) time.sleep(0.4) pword_field.send_keys(Keys.ENTER) print("Successfully logged in.") time.sleep(30) if first_login: for uname in keywords: global current_acc current_acc = uname print(uname) insta = "https://www.instagram.com/" + uname get_account(insta)
def open_driver(first_login=False): with open('following_acc.csv', 'r', newline='') as key: print("Reading following_acc.csv") key_data = csv.reader(key) for row in key_data: detect = re.search(r'N', row[1]) if detect is not None: global crawling_list crawling_list.append(row) else: global finished_acc finished_acc.append(row) global driver driver = webdriver.Firefox() driver.get("https://www.instagram.com/accounts/login/") login = ec.presence_of_element_located((By.NAME, 'username')) wdw(driver, 15).until(login) uname_field = driver.find_element_by_name('username') username = '******' for i in username: uname_field.send_keys(i) time.sleep(0.2) pword_field = driver.find_element_by_name('password') password = '******' for i in password: pword_field.send_keys(i) time.sleep(0.4) pword_field.send_keys(Keys.ENTER) print("Successfully logged in.") time.sleep(15) if first_login: see_following()
def get_account(link): global driver try: driver.get(link) global iteration_count global current_acc current_acc = link bio_pr = ec.presence_of_element_located((By.CSS_SELECTOR, 'div.-vDIg')) wdw(driver, 15).until(bio_pr) time.sleep(30) bio = driver.find_element_by_css_selector('div.-vDIg') rm_d = re.sub(r'\D', '', bio.text) prog = re.search(r'(08|628)\d{8,10}', rm_d) if prog: iteration_count = 0 follower_count = driver.find_element_by_css_selector( 'ul li a span') fol = int(follower_count.text) acc_name = driver.find_element_by_css_selector('h2') rm_nl = re.sub(r'\n', '', bio.text) uni_ascii = rm_nl.encode('ascii', 'ignore') raw_data = [] raw_data.append(link) raw_data.append(acc_name.text) raw_data.append(fol) raw_data.append(prog.group()) raw_data.append(uni_ascii) print(raw_data) time.sleep(30) with open('instagram_data9.csv', 'a+', newline='') as append_data: append_this = csv.writer(append_data) append_this.writerow(raw_data) global account_scraped account_scraped += 1 print(account_scraped) except IndexError: print("no bio found") pass except ValueError: print("follower exceeds 999") pass except TimeoutException: global iteration_count if iteration_count < 1: pass iteration_count = 1 else: print("blocked, sleep for 3 hours") iteration_count = 0 time.sleep(1800) driver.close() print(time.asctime()) time.sleep(10800) print("reopening driver") open_driver() get_account(current_acc) except NoSuchElementException: print("user name not found") pass
def check(browser): # 把这个函数弄成多线程函数然后挂起它,一旦遇到问题了在调用 mutex = threading.Lock() wait = wdw(browser, 5) # 验证模块少等一点,而寻找模块等久一点,防止出现验证模块的时候切换到了寻找模块 while True: mutex.acquire() print('执行一次') try: wait.until( EC.presence_of_element_located( (By.XPATH, '//div[@id="J_sufei"]'))) print('验证成功') checkAction() browser.refresh() # 验证成功要刷新一次才能获取 handle = browser.current_window_handle browser.switch_to_window(handle) time.sleep(30) # 每次验证了久等待 # return browser 如果return回去的话线程就会死掉 # except: # print('未找到验证模块') #一旦没有找打验证模块说明,只是因为没有刷新才遇到了问题 # handle=browser.current_window_handle # browser.refresh() #验证成功要刷新一次才能获取 # browser.switch_to_window(handle) # return browser except: print('没有检测到模块') mutex.release()
def OpenPage(browser, keyword): #進行搜尋: wait = wdw(browser, 10) wait.until( EC.presence_of_element_located( (By.CSS_SELECTOR, 'a[href="//www.taobao.com/"]'))).click() wait = wdw(browser, 10) input_key = wait.until( EC.presence_of_element_located((By.CSS_SELECTOR, '#mq'))) click = wait.until( EC.element_to_be_clickable((By.CSS_SELECTOR, 'input[type="submit"]'))) input_key.send_keys(keyword) click.click() # wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'iframe[class="srp-iframe"]'))).click() #打開新視窗 #switchWindows(browser,1) #切换視窗 return browser
def login(): global driver driver.get("https://www.instagram.com/accounts/login/") login = ec.presence_of_element_located((By.NAME, 'username')) wdw(driver, 15).until(login) uname_field = driver.find_element_by_name('username') username = '******' for i in username: uname_field.send_keys(i) time.sleep(0.2) pword_field = driver.find_element_by_name('password') password = '******' for i in password: pword_field.send_keys(i) time.sleep(0.4) pword_field.send_keys(Keys.ENTER) print("Successfully logged in.")
def search(kword): target_addr = [] global driver search_bar = driver.find_element_by_css_selector('input[type="text"]') search_bar.send_keys(kword) print("Starts searching " + str(kword)) acc_pr = ec.presence_of_element_located((By.CSS_SELECTOR, 'a.yCE8d')) wdw(driver, 15).until(acc_pr) time.sleep(5) account_lists = driver.find_elements_by_css_selector('a.yCE8d') for a in account_lists: target_addr.append(a.get_attribute('href')) print("total acc: " + str(len(target_addr))) for addr in target_addr: escape_hashtag = re.search('/explore/', addr) if escape_hashtag is None: get_account(addr)
def getGoods(browser): time.sleep(3) wait = wdw(browser, 10) for row in range(1, 11): # 商品一个最多多少行 ,只有正常退出循环才有下一页 for col in range(1, 5): # 每一行多少个商品也就是多少列,因为最后一个是item-last所以这里不能选节点 row = str(row) col = str(col) try: d = wait.until( EC.element_to_be_clickable(( By.XPATH, '//div[@class="J_TItems"]//div[@class="item4line1"][' + row + ']//dl[' + col + ']/dd[@class="thumb"]/preceding-sibling::*[1]//img'))) # d.click()#打开物品 print(d.text) browser.execute_script("(arguments[0]).click()", d) # 用脚本点击不用刷新 switchWindows(browser, 3) # 切这个窗口才能打印 # 点击了商品check一下 shoe = wait.until( EC.presence_of_element_located( (By.XPATH, '//h1[@data-spm="1000983"]'))).text print('型号:' + shoe) sales = wait.until( EC.presence_of_element_located( (By.XPATH, '//span[@class="tm-count"]'))).text print('月销量:' + sales) try: promoteprice = wait.until( EC.presence_of_element_located(( By.XPATH, '//dl[@class="tm-promo-panel tm-promo-cur"]//span[@class="tm-price"]' ))).text print('促销价格¥:' + promoteprice) except: promoteprice = '不做促销' print(promoteprice) sales = wait.until( EC.presence_of_element_located(( By.XPATH, '//dl[@id="J_StrPriceModBox"]//span[@class="tm-price"]' ))).text print('原价格:¥' + sales) browser.close() # 打印完关闭 switchWindows(browser, 2) # 切回去 # browser.refresh() #刷新 except: print("找不到商品") return browser # 找完商品了测试有没有下一页 return browser # 找完商品了测试有没有下一页
def StaRt(): D.get('https://kof.bizsys.pearson.com/markview/MVT_Web_Inquiry.Home?InquiryType=APINVOICE&HelpID=Web%20Inquiry%20-%20AP%20Invoices') # D.get('https://pearson.coupahost.com/invoices') # D.get('http://*****:*****@id="user-name-txt"]'))) print('Complete MFA') except te: print('Time Up, closing application, please re-run') D.quit() timeout = 61 try: wdw(D, timeout).until(ec.visibility_of_element_located((By.XPATH, '//*[@id="82"]'))) # wdw(D, timeout).until(ec.visibility_of_element_located((By.XPATH, '//*[@id="pageHeader"]'))) except te: print('You Did not Enter the MFA Code') D.quit()
def open_driver(first_login=False): global driver driver = webdriver.Firefox() driver.get("https://www.instagram.com/accounts/login/") login = ec.presence_of_element_located((By.NAME, 'username')) wdw(driver, 15).until(login) uname_field = driver.find_element_by_name('username') username = '******' for i in username: uname_field.send_keys(i) time.sleep(0.2) pword_field = driver.find_element_by_name('password') password = '******' for i in password: pword_field.send_keys(i) time.sleep(0.4) pword_field.send_keys(Keys.ENTER) print("Successfully logged in.") time.sleep(30) if first_login: driver.get("https://www.instagram.com/") not_now = driver.find_elements_by_css_selector( 'div[role="dialog"] div div div button') driver.execute_script("arguments[0].click();", not_now[1]) global iteration_count for query in keywords: city = [ "Bekasi", "Tangerang", "Tangsel", "Depok", "Bogor", "Jakarta" ] for c in city: q = ''.join(query) + " " + c search(q) driver.close() print(c + " has finished scraping, moving on.") print(time.asctime()) time.sleep(1800) open_driver()
def waitForActionsOnElement(self, timeout=10, pollFrequency=0.5): try: wait = wdw(self.driver, timeout=timeout, poll_frequency=pollFrequency, ignored_exceptions=[ NoSuchElementException, ElementNotVisibleException, ElementNotSelectableException, TimeoutException ]) # element = wait.until(EC.element_to_be_clickable((byType, locator))) except: print('Error while wait for the element.') return wait
def nextPage(browser): # time.sleep(3) #挺十秒在爬 # browser.refresh() #在nextpage这里出错 wait = wdw(browser, 10) try: nextPage = wait.until( EC.presence_of_element_located( (By.XPATH, '//a[@class="J_SearchAsync next"]'))) print(nextPage.text) browser.execute_script("(arguments[0]).click()", d) # 用脚本点击不用刷新 print(browser) return browser, True except: browser.close() switchWindows(browser, 1) print('nextPage没有下一页') print(browser) return browser, False # 没有下一页了
def openPage(browser): # 打开nike wait = wdw(browser, 10) wait.until(EC.presence_of_element_located((By.ID, 'q'))).send_keys('nike') wait.until(EC.element_to_be_clickable( (By.CSS_SELECTOR, '.btn-search'))).click() wait.until( EC.element_to_be_clickable( (By.CSS_SELECTOR, 'iframe[class="srp-iframe"]'))).click() # 打开新窗口 # browser.execute_script("(arguments[0]).click()",a) 淘宝启用了noscrpit反爬 switchWindows(browser, 1) # 切换窗口 # 打开女子,打开女子就验证 wait.until( EC.element_to_be_clickable(( By.CSS_SELECTOR, 'a[href="//nike.tmall.com/category-1394890745.htm?spm=a1z10.5-b-s.w4011-14234872789.54.4b40295bxWLzrN&search=y&scene=taobao_shop#TmshopSrchNav"]' ))).click() # wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'a[href="//nike.tmall.com/category-1394899096.htm?spm=a1z10.5-b-s.w4011-14234872789.90.5694295bEoa4Mm&search=y&scene=taobao_shop#TmshopSrchNav"]'))).click() # browser.refresh() # wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'a[href="//nike.tmall.com/category-1394899099.htm?spm=a1z10.5-b-s.w4011-14234872789.70.2e8a295bMdIyvv&search=y&scene=taobao_shop#TmshopSrchNav"]'))).click() return browser
def nextClass(browser, page): wait = wdw(browser, 10) name = str(page) tryTime = 0 while tryTime < 10: # 如果点击失败了,就再试一次,四次都不成功就退出 # time.sleep(5) try: a = wait.until( EC.element_to_be_clickable( (By.XPATH, '//div[@class="J_TWidget nav180"]//dd[' + name + ']/a'))) # 分类 print(a.text) browser.execute_script("(arguments[0]).click()", a) # 用脚本点击不用刷新 switchWindows(browser, 2) # 现在一共有三个窗口切换过去 print(browser) return browser # 打开完以后就返回 except: wait.until( EC.presence_of_element_located( (By.XPATH, '//div[@id="J_sufei"]'))) tryTime = tryTime + 1 print('原来是检测了不要刷新,在循环一次试试')
def get_account(link): global driver driver.get(link) global current_acc current_acc = link try: no_acc_found = driver.find_element_by_css_selector( 'div.error-container') print('Account not found.') pass except NoSuchElementException: try: bio_pr = ec.presence_of_element_located( (By.CSS_SELECTOR, 'div.-vDIg')) wdw(driver, 15).until(bio_pr) time.sleep(30) bio = driver.find_element_by_css_selector('div.-vDIg') rm_d = re.sub(r'\D', '', bio.text) prog = re.search(r'(08|628)\d{8,10}', rm_d) if prog: follower_count = driver.find_element_by_css_selector( 'ul li a span') fol = int(follower_count.text) acc_name = driver.find_element_by_css_selector('h2') rm_nl = re.sub(r'\n', '', bio.text) uni_ascii = rm_nl.encode('ascii', 'ignore') raw_data = [] raw_data.append(link) raw_data.append(acc_name.text) raw_data.append(fol) raw_data.append(prog.group()) raw_data.append(uni_ascii) print(raw_data) time.sleep(30) with open('google_instagram_data.csv', 'a+', newline='') as append_data: append_this = csv.writer(append_data) append_this.writerow(raw_data) global acc_count acc_count += 1 print(acc_count) except IndexError: print("no bio found") pass except ValueError: print("follower exceeds 999") pass except TimeoutException: #print("blocked, sleep for 2 hours") #driver.close() #print(time.asctime()) #time.sleep(7200) #print("reopening driver") #global driver #driver = webdriver.Firefox() #login() #get_account(current_acc) pass except NoSuchElementException: print("username not found") pass
def __login__(username, password, pathA, pathB): pyautogui.PAUSE = 0.5 # 设置每个动作0.2s太快来不及输入密码 options = wb.ChromeOptions() options.add_experimental_option('excludeSwitches', ['enable-automation']) # 切换到开发者模式 browser = wb.Chrome(options=options) browser.maximize_window() # 窗口最大化保证坐标正确 browser.get('https://login.taobao.com/member/login.jhtml') # try: # left,top,width,height=pyautogui.locateOnScreen('G:/jupyter project/淘宝/login_switch_blue.PNG') # except: # left,top,width,height=pyautogui.locateOnScreen('G:/jupyter project/淘宝/login_switch.PNG') 获取login_switch位置 time.sleep(3) moveToX = 1484 moveToY = 297 pyautogui.moveTo(1484, 297) # 移动到切换登录的位置 pyautogui.click() # 点击切换按钮 pyautogui.typewrite(username) pyautogui.press('tab') pyautogui.typewrite(password) errorType = 0 try: left, top, width, height = pyautogui.locateOnScreen(pathA) print('识别蓝色') moveToX = left + 140 moveToY = top + 15 print(moveToX, moveToY) pyautogui.moveTo(moveToX, moveToY) pyautogui.mouseDown() moveToX = moveToX + 300 pyautogui.moveTo(moveToX, moveToY) pyautogui.mouseUp() pyautogui.moveTo(moveToX - 250, moveToY + 60) pyautogui.mouseDown() pyautogui.mouseUp() except: errorType = 1 # 识别不出蓝色 if errorType == 1: try: left, top, width, height = pyautogui.locateOnScreen(pathB) moveToX = left + 200 moveToY = top + 20 print('识别红色') print(moveToX, moveToY) # 1299 497 pyautogui.moveTo(moveToX, moveToY) pyautogui.mouseDown() moveToX = moveToX + 300 pyautogui.moveTo(moveToX, moveToY) pyautogui.mouseUp() pyautogui.moveTo(moveToX - 250, moveToY + 60) pyautogui.mouseDown() pyautogui.mouseUp() except: errorTye = 2 # 识别不出绿色 if errorType == 2: print('没有滑块') pyautogui.moveTo(1189, 497) pyautogui.mouseDown() pyautogui.mouseUp() # 调整到淘宝首页 wait = wdw(browser, 10) wait.until( EC.presence_of_element_located( (By.CSS_SELECTOR, 'a[href="//www.taobao.com/"]'))).click() return browser # 返回浏览器当前的页面
# Localizando elementos no Browser driver.find_element_by_name( "q" ) # Existem várias formas de encontrar elementos como id, tag name... busque por elements para ter uma lista # Armazene esse elemento em uma variável e você pode iterar o processo para encontrar um elemento nessa variável # Um web element tem o atributo text para obter o texto dele. # Gerenciamento de Tela driver.get_window_size( ) # Retorna uma tupla largura x altura. Pode acrescentar o método get("width") ou height pra especificar um dos dois driver.set_window_size(1000, 1000) driver.get_window_position( ) # Retorna tupla com a coordenada do canto superior esquerdo driver.set_window_position(0, 0) driver.maximize_window() driver.minimize_window() # Trabalhando com condições e espera wait = wdw( driver, 1) # O primeiro argumento é a instância do driver, o segunto é um tiemout wait.until( ec.number_of_windows_to_be(2) ) # Esse parâmetro faz uma espera até que a condição de janelas seja dois wait.until( ec.title_is ) # Existem muitos parâmetros interessantes nesse expected conditions, sempre bom fazer o dir # Navegando entre abas # É preferível usar pyautogui, mais fácil e mais legível. https://www.selenium.dev/documentation/en/webdriver/browser_manipulation/
print("Scan the QR code then come here.") browser = input("Enter your fav. browser(Firefox/Chrome): ") if browser == "Firefox": driver = wd.Firefox() # open firefox elif browser == "Chrome": driver = wd.Chrome() else: print("Choose between Firefox/Chrome.") driver.maximize_window() driver.implicitly_wait(10) driver.get("https://web.whatsapp.com/") try: element = wdw(driver, 10).until( EC.title_contains("WhatsApp") ) try: print("Scan the QR code then proceed.") time.sleep(10) while True: contact_name = input("Enter the contact/group name \nwhose data you want scrap: ") try: user = driver.find_element_by_xpath('//span[@title = "{}"]'.format(contact_name)) except Exception as e: search_box = driver.find_element_by_xpath('//div[@class="_2S1VP copyable-text selectable-text"]') search_box = wdw(driver, 50).until( lambda driver : search_box)
def NextPage(browser): wait = wdw(browser, 10) wait.until( EC.element_to_be_clickable( (By.CSS_SELECTOR, 'li[class="item next"]'))).click() return browser
driver.get('https://info.aec.edu.in/aec/default.aspx') driver.find_element_by_id('txtId2').send_keys('---UID---') driver.find_element_by_id('txtPwd2').send_keys('---PWD---') log_in = driver.find_element_by_id('imgBtn2') driver.execute_script("arguments[0].click();", log_in) driver.find_elements_by_class_name('menuLink')[0].click() driver.switch_to.frame('capIframeId') driver.find_element_by_id('radPeriod').click() driver.find_element_by_id('txtFromDate').send_keys(today) driver.find_element_by_id('txtToDate').send_keys(today) driver.find_element_by_id('btnShow').click() wdw(driver, 20).until(EC.presence_of_element_located((By.CLASS_NAME, "cellBorder"))) soup = BeautifulSoup(driver.page_source, 'html.parser') tables = soup.find_all('table', class_='cellBorder') dfs = pd.read_html(str(tables)) dfs[0].drop([0], axis=0, inplace=True) headers = ["Sl.No", "Subject", "Held", "Attend", "%"] print(tabulate(dfs[0], headers, tablefmt='pretty', showindex=False)) driver.switch_to.default_content() driver.find_element_by_id('lnkLogOut').click() driver.quit() #Used to close the driver
from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.webdriver.support.ui import WebDriverWait as wdw from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By import time fb = webdriver.Chrome() fb.get('https://www.facebook.com') fb.find_element_by_id('email').send_keys('facebook id') fb.find_element_by_id('pass').send_keys('password' + Keys.ENTER) fb.execute_script('window.open("https://accounts.google.com");') #fb.get("https://accounts.google.com") fb.switch_to_window(fb.window_handles[1]) fb.find_element_by_xpath('//input[@type="email"]').send_keys('gmail id') fb.find_element_by_id('identifierNext').click() wdw(fb, 3600).until(EC.presence_of_element_located((By.NAME, "password"))) time.sleep(1) fb.find_element_by_name('password').send_keys('gmail password') fb.find_element_by_id('passwordNext').click() time.sleep(2) fb.get('https://www.youtube.com') fb.find_element_by_tag_name('html').send_keys(Keys.ESCAPE)