def get_sgd_price(): html = None url = 'https://www.rba.gov.au/statistics/frequency/exchange-rates.html' selector = 'SGD' delay = 5 # sec, to wait for item to load price = 0 # default value driver = WebDriverWrapper() driver._driver.get(url) try: # wait until element visible WebDriverWait(driver._driver, delay).until( EC.presence_of_element_located((By.ID, selector))) except TimeoutException: print('Loading took too long.') else: html = driver._driver.page_source finally: driver.close() if html: soup = bs4.BeautifulSoup(html, "html.parser") data = soup.find(attrs={"id": selector}) price = data.contents[3].next return str(price) # str for dynamoDB
def get_oil_price(): html = None url = 'https://www.tradingview.com/symbols/NYMEX-AV02%21/' selector = 'tv-symbol-price-quote__value js-symbol-last' selector1 = 'tv-symbol-price-quote__value' # use this to check when javascript on page has loaded delay = 5 # sec, to wait for item to load price = 0 # default value driver = WebDriverWrapper() driver._driver.get(url) try: # wait until element visible WebDriverWait(driver._driver, delay).until( EC.presence_of_element_located((By.CLASS_NAME, selector1))) except TimeoutException: print('Loading took too long.') else: html = driver._driver.page_source finally: driver.close() if html: soup = bs4.BeautifulSoup(html, "html.parser") data = soup.find("div", class_=selector) price = data.text return str(price) # str for dynamoDB
def lambda_handler(event, context): driver = WebDriverWrapper() driver.get_url('http://35.236.100.236:8080') number = int(random.random()*100)%4 if number == 0: driver.click("//span[@data-customer='123']") elif number == 1: driver.click("//span[@data-customer='392']") elif number == 2: driver.click("//span[@data-customer='731']") else: driver.click("//span[@data-customer='567']") print("--------------------------") print("Success") print("--------------------------") time.sleep(1) driver.close() return { 'statusCode': 200, 'body': json.dumps({'message': 'Success - Clicked HotRod link'+' '+str(number)}) }
def scrape_amazon_reviews(config): """ Scrapes amazon reviews. """ urls = config['urls'] sleep_range = (1, 3) driver = WebDriverWrapper() try: for i, product_url in enumerate(urls): driver.open_amazon_product(product_url) driver.scrape_reviews() # sleep a bit if sleep_range: random_sleep(sleep_range) except AmazonDetectionException as e: logger.fatal('Amazon detected the scraping. Aborting.') pass pprint.pprint(driver.results) driver.close() logger.info('Got {} results out from {} urls'.format(len(driver.results.keys())-3, len(urls))) return driver.results, driver.status
def lambda_handler(*args, **kwargs): driver = WebDriverWrapper() driver.get_url('http://example.com') example_text = driver.get_inner_html('(//div//h1)[1]') driver.close() return example_text
def lambda_handler(*args, **kwargs): driver = WebDriverWrapper() driver.get_url('https://www.gta-homes.com/') # body = driver.get_body() # print(body) driver.close()
def scraper_handler(event, context): driver = WebDriverWrapper() driver.get_url(event['url']) source = driver.get_html() driver.close() return source
def lambda_handler(*args, **kwargs): driver = WebDriverWrapper() driver.get_url(week_data['es_url']) driver.get_soup() es_data.ProcessGamesToS3(driver.soup, week_id) driver.close() driver = WebDriverWrapper() driver.get_url(week_data['os_url']) driver.get_soup() os_data.ProcessGamesToS3(driver.soup) driver.close()
def lambda_handler(*args, **kwargs): driver = WebDriverWrapper() driver.get_url('https://www.backerkit.com/admins/sign_in') driver.set_input_value_byName('admin[email]', 'YOUR_EMAIL') driver.set_input_value_byName('admin[password]', 'YOUR_PASSWORD') driver.click_byName('commit') driver.get_url('URL_OF_EXPORT_PAGE_OF_DESIRED_SEGMENT') driver.set_input_value('XPATH_OF_FILE', '\n') driver.close()
def lambda_handler(*args, **kwargs): driver = WebDriverWrapper() receiptnumber = os.getenv("RECEIPTNUMBER") driver.get_url('https://egov.uscis.gov/casestatus/landing.do') driver.set_input_value( '/html/body/div[2]/form/div/div[1]/div/div[1]/fieldset/div[1]/div[4]/input', receiptnumber) driver.click( '/html/body/div[2]/form/div/div[1]/div/div[1]/fieldset/div[2]/div[2]/input' ) example_text = driver.get_inner_html('(//div//h1)[1]') driver.close() return example_text
def lambda_handler(*args, **kwargs): driver = WebDriverWrapper() example_text = '' sekinAl = "https://seekingalpha.com/market-news/all" # driver.get("http://www.python.org") driver.get_url(sekinAl) # assert "Python" in driver.title # driver elem = driver.find_elements_by_class_name("title") for i in elem: print(i.text) driver.close() return example_text
def lambda_handler(*args, **kwargs): driver = WebDriverWrapper() driver.get_url('https://www.google.es/') driver.set_input_value('//input[@name="q"]', '21 buttons') button = driver.find("(//input[@name='btnK'])[2]") button.send_keys(Keys.TAB) driver.click('//input[@name="btnK"]') first_google_result_title = driver.get_inner_html( '(//div[@class="rc"]//a)[1]') print("--------------------------") print(first_google_result_title) print("--------------------------") driver.close()
def lambda_handler(*args, **kwargs): driver = WebDriverWrapper() driver.get_url('https://www.myprotein.com/nutrition/protein.list') # driver.set_input_value('//input[@name="q"]', '21 buttons') # button = driver.find("(//input[@name='btnK'])[2]") # button.send_keys(Keys.TAB) # driver.click('//input[@name="btnK"]') time.sleep(2) offers_html = driver.get_inner_html( "//*[@class='stripBanner']//*[@class='stripBanner_text']//p") print("--------------------------") print('Banner element:', offers_html) voucher_code_reg = re.search(r"CODE\:(.*)", offers_html) # print("Code (match):", voucher_code_reg) print("Code (regex):", voucher_code_reg.group(0)) voucher_code = voucher_code_reg.group(1).strip() print("Code:", voucher_code) voucher_text_reg = re.search(r"(.*)\|", offers_html) print("Code desc (regex):", voucher_text_reg.group(0)) voucher_desc = re.sub('<[^<]+?>', ' ', voucher_text_reg.group(1).strip()) print("Code description:", voucher_desc) print("--------------------------") # first_google_result_title = driver.get_inner_html( # '(//div[@class="rc"]//a)[1]') # print("--------------------------") # print(first_google_result_title) # print("--------------------------") driver.close() return {'message': f"Code: {voucher_code}, Description: {voucher_desc}"}
def ProcessUrl(url, width, height): currentOrientation = "landscape" if width >= 1000: currentOrientation = "portrait" width, height = DetermineSizeFromOrientation(currentOrientation) try: # Start up the web driver. driver = WebDriverWrapper(str(width), str(height)) driver.get_url(url) # Allow the dashboards to load, to allow the web report sections done increment or decrement the counter time.sleep(3000) WebDriverWait() wait = WebDriverWait(driver, 32) isReady = False while not isReady: try: wait.Until(driver.execute_script('return window.webReportSectionsDone')) except WebDriverTimeoutException as e: # Catch timeout exceptions here as this means that at least one of the sections times out while loading. # If one section times out in a report, the entire report should not be generated. #cancellationToken.ThrowIfCancellationRequested(); #except InvalidOperationException as e: # Catch invalid operation exception if the page is not loaded yet but code need to access 'webReportSectionsDone' isReady = driver.execute_script('return window.webReportSectionsDone') #cancellationToken.ThrowIfCancellationRequested(); # Get the page source outputCDontents = driver.page_source #TODO: driver.Navigate().GoToUrl(new Uri(url).GetLeftPart(UriPartial.Authority) + "/#/logout"); #Initiate a logout request so the next time we use this instance, we don't reuse the session parsed_uri = urlparse(url) result = '{uri.scheme}://{uri.netloc}/'.format(uri=parsed_uri) driver.get_url(result + '#/logout') return outputCDontents finally: driver.close()
def lambda_handler(*args, **kwargs): driver = WebDriverWrapper() driver.get_url('https://www.google.es/') driver.set_input_value('//input[@id="lst-ib"]', '21 buttons') driver.click('//center//img[@alt="Google"]') time.sleep(0.5) driver.click('//input[@name="btnK"]') time.sleep(0.5) first_google_result_title = driver.get_inner_html( '(//div[@class="rc"]//a)[1]') print("--------------------------") print(first_google_result_title) print("--------------------------") driver.close()
def lambda_handler(event, context): driver = WebDriverWrapper() bucket = event['bucket'] html_key = event['task']['html_key'] #dataset_key = event['task']['dataset_key'] #config_key = event['task']['config_key'] png_key = event['task']['png_key'] #open('/tmp/datasets.js', 'w').write(get_file(bucket, dataset_key)) #open('/tmp/mapConfig.js', 'w').write(get_file(bucket, config_key)) driver.get_url(get_html(bucket, html_key)) time.sleep(15) driver.get_screenshot_as_file('/tmp/heatmap.png') print(driver._driver.capabilities) s3.upload_file('/tmp/heatmap.png', Bucket=bucket, Key=png_key) driver.close()
def lambda_handler(*args, **kwargs): driver = WebDriverWrapper() example_text = '' sekinAl="https://seekingalpha.com/market-news/all" # driver.get("http://www.python.org") driver.get(sekinAl) # assert "Python" in driver.title # driver elem = driver.find_elements_by_class_name("title") for i in elem: if 'doge' in i.text print(i.text) # /html/body/div[3]/div/div/div[2]/ul/li[2]/div[2]/div[1] # elem.clear() # elem.send_keys("pycon") # elem.send_keys(Keys.RETURN) # assert "No results found." not in driver.page_source driver.close() return example_text
def lambda_handler(*args, **kwargs): driver = WebDriverWrapper() driver.get_url('https://www.amazon.com/') html = driver.get_inner_html('//*[@id="nav-your-amazon"]') # driver.click('//center//img[@alt="Google"]') # time.sleep(0.5) # # driver.click('//input[@name="btnK"]') # time.sleep(0.5) # # first_google_result_title = driver.get_inner_html('(//div[@class="rc"]//a)[1]') print("--------------------------") # print(first_google_result_title) print(html) print("--------------------------") driver.close() return 0
import time from webdriver_wrapper import WebDriverWrapper from selenium.webdriver.common.keys import Keys import time import json import boto3 import pandas as pd if __name__ == "__main__": driver = WebDriverWrapper() driver.get_url('https://www.google.com/') page_title = driver.get_page_title() print("--------------------------") print(page_title) print("--------------------------") driver.close() data = {} data['page_title'] = page_title df = pd.DataFrame(data) df.to_csv("s3://freshket-marketprice/test.csv", index=False)
def lambda_handler(*args, **kwargs): driver = WebDriverWrapper()._driver db = firebase.FirebaseApplication( 'https://dash-associados-default-rtdb.firebaseio.com/', None) data = {"username": "******"} db.post('/users', data) # driver.get("https://associados.amazon.com.br") # driver.get_url('http://example.com') # example_text = driver.get_inner_html('(//div//h1)[1]') # driver.close() driver.get("https://associados.amazon.com.br") try: cookies_file = open("cookies.txt") if os.fstat(cookies_file.fileno()).st_size == 0: raise IOError for cookie in cookies_file: driver.add_cookie(json.loads(cookie)) except IOError: driver.find_element_by_xpath("//a[@href='/login']").click() username = driver.find_element_by_id("ap_email") username.clear() username.send_keys("*****@*****.**") password = driver.find_element_by_id("ap_password") password.clear() password.send_keys("infOaz19!") driver.find_element_by_id("signInSubmit").click() while ('home' not in driver.current_url): if 'approval' in driver.current_url: print('Aprove o login no celular.') fastrack = WebDriverWait(driver, 300).until( ec.visibility_of_element_located( (By.XPATH, "//div[@data-assoc-eid='ac-home-month-summary']"))) elif 'signin' in driver.current_url: captcha_img = driver.find_element_by_xpath( "//img[@alt='CAPTCHA']").get_attribute("src") print(captcha_img) captcha_input = driver.find_element_by_id("auth-captcha-guess") captcha = input("Digite o CAPTCHA e aperte ENTER\n") print(f'Usando o captcha "{captcha}"') captcha_input.send_keys(captcha) password = driver.find_element_by_id("ap_password") password.clear() password.send_keys("infOaz19") driver.find_element_by_id("signInSubmit").click() with open("cookies.txt", "w") as cookies_file: for cookie in driver.get_cookies(): cookies_file.write(json.dumps(cookie) + '\n') finally: cookies_file.close() summaries = driver.find_elements_by_xpath( "//div[@data-assoc-eid='ac-home-month-summary']//div[contains(@class, 'a-row')]//div[contains(@class, 'a-ws-span-last')]" ) total_sent = summaries[0].text total_gains = summaries[1].text total_ordered = summaries[2].text total_clicks = summaries[3].text driver.close() return f'Produtos pedidos: "{total_sent}" - Ganho: "{total_gains}" - Produtos pedidos: "{total_ordered}" - Cliques: "{total_clicks}"'