def mailru_collector(): driver = Safari() driver.maximize_window() db = client['Sends_from_mailru'] sends_in_db = db.sends_in_db driver.get('https://mail.ru') elem = driver.find_element_by_id('mailbox:login-input') elem.send_keys(get_login()) elem.submit() elem = WebDriverWait(driver, 10).until( EC.visibility_of(driver.find_element_by_id('mailbox:password-input'))) elem.send_keys(get_password()) elem.submit() WebDriverWait(driver, 10).until(EC.title_contains('- Почта Mail.ru')) sends_block = driver.find_element_by_xpath( "//div[@class='dataset__items']") sends = set() while True: len_of_sends_list = len(sends) list_sends = sends_block.find_elements_by_xpath(".//a[@href]") for send in list_sends: sends.add(send.get_attribute('href')) if len(sends) == len_of_sends_list: break var = list_sends[-1].location_once_scrolled_into_view time.sleep(1) while sends: send_dict = {} send_url = sends.pop() driver.get(send_url) WebDriverWait(driver, 10).until( EC.presence_of_element_located( (By.XPATH, "//div[@class='letter-body']"))) text_send = driver.find_element_by_xpath( "//div[contains(@class, 'letter-body')]").text title_send = driver.find_element_by_tag_name('h2').text from_send = driver.find_element_by_class_name('letter-contact').text date_send = driver.find_element_by_class_name('letter__date').text send_dict['text'] = text_send send_dict['title'] = title_send send_dict['from'] = from_send send_dict['date'] = date_send send_dict['url'] = send_url sends_in_db.update_one({'url': send_url}, {'$set': send_dict}, upsert=True) driver.close()
def test_safari(): ''' A selenium driver exists for Safari, but it's not headless as far as I can tell ''' from selenium.webdriver import Safari print(dir(Safari), '\n') from selenium.webdriver import safari print(dir(safari)) browser = Safari() browser.get('https://duckduckgo.com')
def mvideo_hits_collector(): driver = Safari() driver.maximize_window() db = client['Goods_from_hits_mvideo'] goods_in_db = db.goods_in_db driver.get('https://www.mvideo.ru') gallery_titles = driver.find_elements_by_xpath( "//div[@class='gallery-title-wrapper']") for gal_title in gallery_titles: if 'Хиты продаж' in gal_title.text: hits_block = gal_title.find_element_by_xpath( "./ancestor::div[@class='section']") next_button = hits_block.find_element_by_xpath( ".//a[contains(@class, 'next-btn')]") while True: next_button.click() WebDriverWait(hits_block, 10).until( EC.element_to_be_clickable((By.XPATH, ".//li//h4/a"))) WebDriverWait(hits_block, 10).until( EC.element_to_be_clickable( (By.XPATH, ".//a[contains(@class, 'next-btn')]"))) if 'disable' in next_button.get_attribute('class'): break goods_list_html = hits_block.find_elements_by_xpath(".//li//h4/a") for good in goods_list_html: good_dict = {} good_url = good.get_attribute('href') good_desc = json.loads(good.get_attribute('data-product-info')) good_name = good_desc['productName'] good_price = float(good_desc['productPriceLocal']) good_category = good_desc['productCategoryName'] good_dict['url'] = good_url good_dict['name'] = good_name good_dict['price'] = good_price good_dict['category'] = good_category goods_in_db.update_one({'url': good_url}, {'$set': good_dict}, upsert=True) driver.close()
elif (question.text.rstrip())=="CGPA": print("answering",question.text ) ans=person.find_element_by_xpath('.//input[@class="quantumWizTextinputPaperinputInput exportInput"]') ans.send_keys(currentCgpa) else: print("answer not found for", question.text) continue driver = Safari() driver.get("https://docs.google.com/forms/d/e/1FAIpQLSc5NC4HJX6hf8QqLhixRLwtGMisa4MYVmLV5ixPqYUIYJNHnw/viewform?usp=sf_link") driver.implicitly_wait(7000) def checkform(): try: assert "https://docs.google.com/forms/" in driver.current_url return True except: print("match not found") return False
from selenium.webdriver import Safari b = Safari() b.get('http://google.com') e = b.find_element_by_id('lst-ib') e.click() # is optional, but makes sure the focus is on editbox. e.send_keys('12.34') e.get_attribute('value') # outputs: u'12.34' e.click() e.clear() e.get_attribute('value') # outputs: u'' e.send_keys('56.78') e.get_attribute('value') # outputs: u'56.78'
# importing selenium libararies from selenium.webdriver import Safari #import selenium libraray to amke keys working through #automation from selenium.webdriver.common.keys import Keys # calling the web driver of safari driver = Safari() #taking user id and passowrd input from the user # you can hard code this if you want to save # this file locallly on your computer user1 = input("enter your user id for moddle :") passw1 = input("enter your password :"******"https://moodle.iitd.ac.in/login/index.php") # finding the user id and password elements on the login page user = driver.find_element_by_id("username") passw = driver.find_element_by_id("password") # entering the user id and pasword in the form user.send_keys(user1) passw.send_keys(passw1) # finding the capthca input in the login page captcha = driver.find_element_by_id("valuepkg3") # making a local copy of the login form so that # captcha can be extracted and solved text1 = driver.find_element_by_id("login").text # figuring out which type of capthca is asked case1 = text1[326:329] # for add or sub case2 = text1[332:335] # for first or second value def checking_the_type_of_captha(case1, case2):
from selenium.webdriver import Safari # importing os module to making folders # your PC import os driver = Safari() #taking input from user for the problem they want to scrape problem = int(input("please enter the problem you want to scrape:")) #creating the folder name with problem os.mkdir("/Users/lakshaydagar/Downloads/%s" % (problem)) # navigating to the codeforces site driver.get("https://codeforces.com/contest/%s" % (problem)) # finding the name of variable of part of problem taga = driver.find_elements_by_tag_name("a") #runnig a loop through all the parts in problem for i in range(27, len(taga) - 9, 4): driver.get("https://codeforces.com/contest/%s" % (problem)) taga = driver.find_elements_by_tag_name("a") # converting the part of problem to a string op = taga[i].get_attribute("innerHTML") o = op[29] # mapping is done such that the 29th element in page # source is first part of problem and folllwoing part follow # a 4 no. differance try: # navigating to specific problem page driver.get("https://codeforces.com/contest/%s/problem/%s" % (problem, o)) # creating a sprate folder for the part of the problem os.mkdir("/Users/lakshaydagar/Downloads/%s/%s" % (problem, o)) # maximizing the window so that screenshot can be taken driver.set_window_size(1500, 1080) # saving the screenshot in the problem folder created earlier