def __init__(self, day): self.driver = WebDriverWrapper()._driver self.wait = WebDriverWait(self.driver, 10) #Diario ou retroativo self.scrape(day)
def get_sgd_price(): html = None url = 'https://www.rba.gov.au/statistics/frequency/exchange-rates.html' selector = 'SGD' delay = 5 # sec, to wait for item to load price = 0 # default value driver = WebDriverWrapper() driver._driver.get(url) try: # wait until element visible WebDriverWait(driver._driver, delay).until( EC.presence_of_element_located((By.ID, selector))) except TimeoutException: print('Loading took too long.') else: html = driver._driver.page_source finally: driver.close() if html: soup = bs4.BeautifulSoup(html, "html.parser") data = soup.find(attrs={"id": selector}) price = data.contents[3].next return str(price) # str for dynamoDB
def get_oil_price(): html = None url = 'https://www.tradingview.com/symbols/NYMEX-AV02%21/' selector = 'tv-symbol-price-quote__value js-symbol-last' selector1 = 'tv-symbol-price-quote__value' # use this to check when javascript on page has loaded delay = 5 # sec, to wait for item to load price = 0 # default value driver = WebDriverWrapper() driver._driver.get(url) try: # wait until element visible WebDriverWait(driver._driver, delay).until( EC.presence_of_element_located((By.CLASS_NAME, selector1))) except TimeoutException: print('Loading took too long.') else: html = driver._driver.page_source finally: driver.close() if html: soup = bs4.BeautifulSoup(html, "html.parser") data = soup.find("div", class_=selector) price = data.text return str(price) # str for dynamoDB
def get_all_super_markets(): driver = WebDriverWrapper() driver.get_url('https://www.tesco.com/groceries/en-GB/products/296734865') html = driver.print_all tesco_price = get_prices( "tesco", "https://www.tesco.com/groceries/en-GB/products/296734865", "span", "value", html) asda_price = get_prices( "asda", "https://groceries.asda.com/product/pringles-tube-snacks/pringles-original-sharing-crisps/910003062100", "strong", "co-product__price pdp-main-details__price", driver) morrisons_price = get_prices( "morrisons", "https://groceries.morrisons.com/products/pringles-original-372817011", "h2", "bop-price__current", driver) sainsburys_price = get_prices( "sainsburys", "https://www.sainsburys.co.uk/gol-ui/product/pringles-original-190g", "div", "pd__cost__total undefined", driver) coop_price = get_prices( "coop", "https://www.coop.co.uk/products/pringles-original-200g", "p", "coop-c-card__price", driver) driver.close list_of_shop_prices = [ tesco_price, asda_price, morrisons_price, sainsburys_price, coop_price ] return list_of_shop_prices
def __init__(self): self.in_aws = os.environ.get("AWS_EXECUTION_ENV") is not None self._driver = WebDriverWrapper(download_location="/tmp") self._parameters = SSMParameterStore(prefix="/prod")["crunchtime"] self._driver = WebDriverWrapper(download_location="/tmp") return
class AgendaBolsoCrawler: def __init__(self, day): self.driver = WebDriverWrapper()._driver self.wait = WebDriverWait(self.driver, 10) #Diario ou retroativo self.scrape(day) #self.retroativo() def retroativo(self): #Defina os dias de inicios e fim start = date(2019, 1, 1) end = date(2019, 7, 7) delta = end - start for i in range(delta.days): self.scrape(start + timedelta(days=i)) def scrape(self, day): items_agenda = [] day_string = day.strftime("%Y-%m-%d") print('Dia: ' + day_string) self.driver.get( 'https://www.gov.br/planalto/pt-br/acompanhe-o-planalto/agenda-do-presidente-da-republica/' + day_string) time.sleep(5) compromissos = self.driver.find_elements_by_class_name( "item-compromisso") try: for compromisso in compromissos: horario = '*Horário:* ' + compromisso.find_element_by_xpath( ".//time").text + '\n' titulo = compromisso.find_element_by_xpath( ".//h4[@class='compromisso-titulo']").text + '\n' local = '*Local:* ' + compromisso.find_element_by_xpath( ".//p[@class='compromisso-local']").text + '\n' items_agenda.append([horario, titulo, local]) if (items_agenda): self.send_to_chats(items_agenda, day_string) except: print('Not Found') def send_to_chats(self, rows, day_string): chats = [] string_rows = '\n'.join(''.join(row) for row in rows) string_rows = '*Agenda Bolsonaro ' + day_string + '* \n' + string_rows bot_updates = BOT_INSTANCE.getUpdates() for update in bot_updates: try: chats.append(update['message']['chat']['id']) except: print('empity message') #deduplicate chats = list(set(chats)) print(chats) print(string_rows) for chat in chats: BOT_INSTANCE.sendMessage(chat, string_rows, parse_mode='markdown')
def scrape_amazon_reviews(config): """ Scrapes amazon reviews. """ urls = config['urls'] sleep_range = (1, 3) driver = WebDriverWrapper() try: for i, product_url in enumerate(urls): driver.open_amazon_product(product_url) driver.scrape_reviews() # sleep a bit if sleep_range: random_sleep(sleep_range) except AmazonDetectionException as e: logger.fatal('Amazon detected the scraping. Aborting.') pass pprint.pprint(driver.results) driver.close() logger.info('Got {} results out from {} urls'.format(len(driver.results.keys())-3, len(urls))) return driver.results, driver.status
def ProcessUrl(url, width, height): currentOrientation = "landscape" if width >= 1000: currentOrientation = "portrait" width, height = DetermineSizeFromOrientation(currentOrientation) try: # Start up the web driver. driver = WebDriverWrapper(str(width), str(height)) driver.get_url(url) # Allow the dashboards to load, to allow the web report sections done increment or decrement the counter time.sleep(3000) WebDriverWait() wait = WebDriverWait(driver, 32) isReady = False while not isReady: try: wait.Until(driver.execute_script('return window.webReportSectionsDone')) except WebDriverTimeoutException as e: # Catch timeout exceptions here as this means that at least one of the sections times out while loading. # If one section times out in a report, the entire report should not be generated. #cancellationToken.ThrowIfCancellationRequested(); #except InvalidOperationException as e: # Catch invalid operation exception if the page is not loaded yet but code need to access 'webReportSectionsDone' isReady = driver.execute_script('return window.webReportSectionsDone') #cancellationToken.ThrowIfCancellationRequested(); # Get the page source outputCDontents = driver.page_source #TODO: driver.Navigate().GoToUrl(new Uri(url).GetLeftPart(UriPartial.Authority) + "/#/logout"); #Initiate a logout request so the next time we use this instance, we don't reuse the session parsed_uri = urlparse(url) result = '{uri.scheme}://{uri.netloc}/'.format(uri=parsed_uri) driver.get_url(result + '#/logout') return outputCDontents finally: driver.close()
def lambda_handler(*args, **kwargs): driver = WebDriverWrapper() driver.get_url('http://example.com') example_text = driver.get_inner_html('(//div//h1)[1]') driver.close() return example_text
def __get_youtube_content_url(self, driver_wrapper: WebDriverWrapper, item: str) -> str: # The "shorter way": # driver.web_driver.get(f"{self.YOUTUBE_URL}/results?search_query={item}") # ... ############################################ # The "longer way" using Selenium WebDriver: driver_wrapper.web_driver.get(self.YOUTUBE_URL) driver_wrapper.waiting_timer.until(driver_wrapper.is_visible((driver_wrapper.locate_by.CSS_SELECTOR, "#search-input.ytd-searchbox-spt input"))) search_text_box = driver_wrapper.web_driver.find_element_by_css_selector("#search-input.ytd-searchbox-spt input") driver_wrapper.web_driver.execute_script(self.JS_CLICK_COMMAND, search_text_box) search_text_box.send_keys(item) search_submit_button = driver_wrapper.web_driver.find_element_by_id("search-icon-legacy") driver_wrapper.web_driver.execute_script(self.JS_CLICK_COMMAND, search_submit_button) driver_wrapper.waiting_timer.until(driver_wrapper.is_visible((driver_wrapper.locate_by.CSS_SELECTOR, "#video-title"))) # At least one is visible ### New Code: ### all_page_items = self.__collect_all_items_from_page(driver_wrapper) wanted_elements = [all_page_items[0], all_page_items[1], all_page_items[2]] wanted_elements_titles = self.__extract_titles_from_youtube_items(wanted_elements) index = youtube_content_downloader_utils.find_correct_item_element_index_by_title(wanted_elements_titles, item) # Matching Algorithm print(index) driver_wrapper.web_driver.execute_script(self.JS_CLICK_COMMAND, wanted_elements[index]) # Using the index # print(wanted_element) ### End of New Code ### ### Old Code: ### # all_page_items = self.__collect_all_items_from_page(driver_wrapper) # all_titles = self.__extract_titles_from_youtube_items(all_page_items) # index = youtube_content_downloader_utils.find_correct_item_element_index_by_title(all_titles, item) # Matching Algorithm # print(index) # driver_wrapper.web_driver.execute_script(self.JS_CLICK_COMMAND, all_page_items[index]) # Using the index ### End of Old Code ### # driver_wrapper.web_driver.execute_script(self.JS_CLICK_COMMAND, wanted_element) driver_wrapper.waiting_timer.until(driver_wrapper.is_visible((driver_wrapper.locate_by.TAG_NAME, "video"))) correct_content_url = driver_wrapper.web_driver.current_url print(correct_content_url) return correct_content_url
def scraper_handler(event, context): driver = WebDriverWrapper() driver.get_url(event['url']) source = driver.get_html() driver.close() return source
def __convert_and_download(self, driver_wrapper: WebDriverWrapper, content_url: str, item_format: str) -> None: try: driver_wrapper.web_driver.get(self.WEB_CONVERTERS_URL[0]) except: driver_wrapper.web_driver.get(self.WEB_CONVERTERS_URL[1]) driver_wrapper.waiting_timer.until(driver_wrapper.is_visible((driver_wrapper.locate_by.XPATH, f'//*[@id="{item_format}"]'))) format_button = driver_wrapper.web_driver.find_element_by_xpath(f'//*[@id="{item_format}"]') # mp3 for songs or mp4 for videos driver_wrapper.web_driver.execute_script(self.JS_CLICK_COMMAND, format_button) driver_wrapper.waiting_timer.until(driver_wrapper.is_visible((driver_wrapper.locate_by.XPATH, '//*[@id="input"]'))) url_content_input_field = driver_wrapper.web_driver.find_element_by_xpath('//*[@id="input"]') driver_wrapper.web_driver.execute_script(self.JS_CLICK_COMMAND, url_content_input_field) url_content_input_field.send_keys(content_url) converter_submit_button = driver_wrapper.web_driver.find_element_by_xpath('//*[@id="submit"]') driver_wrapper.web_driver.execute_script(self.JS_CLICK_COMMAND, converter_submit_button) driver_wrapper.waiting_timer.until(driver_wrapper.is_visible((driver_wrapper.locate_by.XPATH, '//*[@id="download"]'))) # Until the element is visible to the user - the convertion had finished download_button = driver_wrapper.web_driver.find_element_by_xpath('//*[@id="download"]') driver_wrapper.web_driver.execute_script(self.JS_CLICK_COMMAND, download_button)
def _login(self): self._driver = WebDriverWrapper(download_location="/tmp") driver = self._driver._driver driver.implicitly_wait(25) driver.set_page_load_timeout(45) driver.get( "https://restaurant.grubhub.com/financials/deposit-history/1669366/" ) driver.find_elements_by_xpath("//input")[0].send_keys( self._parameters["user"]) driver.find_elements_by_xpath("//input")[1].send_keys( self._parameters["password"] + Keys.ENTER) sleep(4) return
def _login(self): self._driver = WebDriverWrapper(download_location="/tmp") driver = self._driver._driver driver.implicitly_wait(25) driver.set_page_load_timeout(45) driver.get("https://partner.postmates.com/dashboard/home/payments") driver.find_element_by_xpath('//input[@name="email"]').send_keys( self._parameters["user"]) driver.find_element_by_xpath('//input[@name="password"]').send_keys( self._parameters["password"] + Keys.ENTER) sleep(3) driver.get("https://partner.postmates.com/dashboard/home/payments") sleep(2) return
def _login(self): self._driver = WebDriverWrapper(download_location="/tmp") driver = self._driver._driver driver.implicitly_wait(25) driver.set_page_load_timeout(45) driver.get("https://fms.flexepos.com/FlexeposWeb/") driver.find_element_by_id("login:username").clear() driver.find_element_by_id("login:username").send_keys( self._parameters["user"]) driver.find_element_by_id("login:password").clear() driver.find_element_by_id("login:password").send_keys( self._parameters["password"]) driver.find_element_by_name("login:j_id29").click() return
def lambda_handler(*args, **kwargs): driver = WebDriverWrapper() driver.get_url('https://www.gta-homes.com/') # body = driver.get_body() # print(body) driver.close()
def _login(self): self._driver = WebDriverWrapper(download_location="/tmp") driver = self._driver._driver driver.implicitly_wait(25) driver.set_page_load_timeout(45) driver.get( "https://merchant-portal.doordash.com/merchant/financials?store_id=631548" ) driver.find_element_by_xpath( '//input[@data-anchor-id="IdentityLoginPageEmailField"]' ).send_keys(self._parameters["user"]) driver.find_element_by_xpath( '//input[@data-anchor-id="IdentityLoginPagePasswordField"]' ).send_keys(self._parameters["password"]) driver.find_element_by_id("login-submit-button").click() return
def lambda_handler(*args, **kwargs): driver = WebDriverWrapper() example_text = '' sekinAl = "https://seekingalpha.com/market-news/all" # driver.get("http://www.python.org") driver.get_url(sekinAl) # assert "Python" in driver.title # driver elem = driver.find_elements_by_class_name("title") for i in elem: print(i.text) driver.close() return example_text
def lambda_handler(event, context): driver = WebDriverWrapper() bucket = event['bucket'] html_key = event['task']['html_key'] #dataset_key = event['task']['dataset_key'] #config_key = event['task']['config_key'] png_key = event['task']['png_key'] #open('/tmp/datasets.js', 'w').write(get_file(bucket, dataset_key)) #open('/tmp/mapConfig.js', 'w').write(get_file(bucket, config_key)) driver.get_url(get_html(bucket, html_key)) time.sleep(15) driver.get_screenshot_as_file('/tmp/heatmap.png') print(driver._driver.capabilities) s3.upload_file('/tmp/heatmap.png', Bucket=bucket, Key=png_key) driver.close()
def _login(self): self._driver = WebDriverWrapper(download_location="/tmp") driver = self._driver._driver driver.implicitly_wait(25) driver.set_page_load_timeout(45) driver.get( "https://restaurant.uber.com/v2/payments?restaurantUUID=8d6b329b-4976-4ef7-8411-3a416614a726" ) driver.find_element_by_id("useridInput").send_keys( self._parameters["user"] + Keys.RETURN) driver.find_element_by_id("password").send_keys( self._parameters["password"] + Keys.RETURN) sleep(5) for element, pin in zip(driver.find_elements_by_xpath("//input"), self._parameters["pin"]): element.send_keys(pin) driver.find_element_by_xpath("//button").click() sleep(10) return
def lambda_handler(*args, **kwargs): driver = WebDriverWrapper() driver.get_url('https://www.myprotein.com/nutrition/protein.list') # driver.set_input_value('//input[@name="q"]', '21 buttons') # button = driver.find("(//input[@name='btnK'])[2]") # button.send_keys(Keys.TAB) # driver.click('//input[@name="btnK"]') time.sleep(2) offers_html = driver.get_inner_html( "//*[@class='stripBanner']//*[@class='stripBanner_text']//p") print("--------------------------") print('Banner element:', offers_html) voucher_code_reg = re.search(r"CODE\:(.*)", offers_html) # print("Code (match):", voucher_code_reg) print("Code (regex):", voucher_code_reg.group(0)) voucher_code = voucher_code_reg.group(1).strip() print("Code:", voucher_code) voucher_text_reg = re.search(r"(.*)\|", offers_html) print("Code desc (regex):", voucher_text_reg.group(0)) voucher_desc = re.sub('<[^<]+?>', ' ', voucher_text_reg.group(1).strip()) print("Code description:", voucher_desc) print("--------------------------") # first_google_result_title = driver.get_inner_html( # '(//div[@class="rc"]//a)[1]') # print("--------------------------") # print(first_google_result_title) # print("--------------------------") driver.close() return {'message': f"Code: {voucher_code}, Description: {voucher_desc}"}
def lambda_handler(*args, **kwargs): driver = WebDriverWrapper() driver.get_url('https://www.amazon.com/') html = driver.get_inner_html('//*[@id="nav-your-amazon"]') # driver.click('//center//img[@alt="Google"]') # time.sleep(0.5) # # driver.click('//input[@name="btnK"]') # time.sleep(0.5) # # first_google_result_title = driver.get_inner_html('(//div[@class="rc"]//a)[1]') print("--------------------------") # print(first_google_result_title) print(html) print("--------------------------") driver.close() return 0
def lambda_handler(*args, **kwargs): driver = WebDriverWrapper() example_text = '' sekinAl="https://seekingalpha.com/market-news/all" # driver.get("http://www.python.org") driver.get(sekinAl) # assert "Python" in driver.title # driver elem = driver.find_elements_by_class_name("title") for i in elem: if 'doge' in i.text print(i.text) # /html/body/div[3]/div/div/div[2]/ul/li[2]/div[2]/div[1] # elem.clear() # elem.send_keys("pycon") # elem.send_keys(Keys.RETURN) # assert "No results found." not in driver.page_source driver.close() return example_text
def lambda_handler(*args, **kwargs): driver = WebDriverWrapper() driver.get_url('https://www.google.es/') driver.set_input_value('//input[@id="lst-ib"]', '21 buttons') driver.click('//center//img[@alt="Google"]') time.sleep(0.5) driver.click('//input[@name="btnK"]') time.sleep(0.5) first_google_result_title = driver.get_inner_html( '(//div[@class="rc"]//a)[1]') print("--------------------------") print(first_google_result_title) print("--------------------------") driver.close()
def __create_driver(self) -> WebDriverWrapper: return WebDriverWrapper(self.__driver_path)
def lambda_handler(*args, **kwargs): driver = WebDriverWrapper() driver.get_url('https://www.google.es/') driver.set_input_value('//input[@name="q"]', '21 buttons') button = driver.find("(//input[@name='btnK'])[2]") button.send_keys(Keys.TAB) driver.click('//input[@name="btnK"]') first_google_result_title = driver.get_inner_html( '(//div[@class="rc"]//a)[1]') print("--------------------------") print(first_google_result_title) print("--------------------------") driver.close()
from multiprocessing import Process, Pipe from multiprocessing.pool import ThreadPool import threading # import pandas as pd # threadLocal = threading.local() # Global array # big = [] # Global response responses = [] # WebDriver browser = WebDriverWrapper() driver = browser._driver def s3_handler(full_path, data): s3 = boto3.client('s3') bucket = 'freshket-marketprice' # csv_buffer = StringIO() # df.to_csv(csv_buffer) uploadByteStream = json.dumps(data, indent=4, sort_keys=True, default=str) # uploadByteStream = bytes(json.dumps(data).encode('UTF-8'))efault).encode('UTF-8') # response = s3.put_object(Bucket=bucket, Key=fileName, Body=csv_buffer.getvalue()) response = s3.put_object(Bucket=bucket, Key=full_path, Body=uploadByteStream) return response def parsing(category_name):
def lambda_handler(*args, **kwargs): driver = WebDriverWrapper()._driver db = firebase.FirebaseApplication( 'https://dash-associados-default-rtdb.firebaseio.com/', None) data = {"username": "******"} db.post('/users', data) # driver.get("https://associados.amazon.com.br") # driver.get_url('http://example.com') # example_text = driver.get_inner_html('(//div//h1)[1]') # driver.close() driver.get("https://associados.amazon.com.br") try: cookies_file = open("cookies.txt") if os.fstat(cookies_file.fileno()).st_size == 0: raise IOError for cookie in cookies_file: driver.add_cookie(json.loads(cookie)) except IOError: driver.find_element_by_xpath("//a[@href='/login']").click() username = driver.find_element_by_id("ap_email") username.clear() username.send_keys("*****@*****.**") password = driver.find_element_by_id("ap_password") password.clear() password.send_keys("infOaz19!") driver.find_element_by_id("signInSubmit").click() while ('home' not in driver.current_url): if 'approval' in driver.current_url: print('Aprove o login no celular.') fastrack = WebDriverWait(driver, 300).until( ec.visibility_of_element_located( (By.XPATH, "//div[@data-assoc-eid='ac-home-month-summary']"))) elif 'signin' in driver.current_url: captcha_img = driver.find_element_by_xpath( "//img[@alt='CAPTCHA']").get_attribute("src") print(captcha_img) captcha_input = driver.find_element_by_id("auth-captcha-guess") captcha = input("Digite o CAPTCHA e aperte ENTER\n") print(f'Usando o captcha "{captcha}"') captcha_input.send_keys(captcha) password = driver.find_element_by_id("ap_password") password.clear() password.send_keys("infOaz19") driver.find_element_by_id("signInSubmit").click() with open("cookies.txt", "w") as cookies_file: for cookie in driver.get_cookies(): cookies_file.write(json.dumps(cookie) + '\n') finally: cookies_file.close() summaries = driver.find_elements_by_xpath( "//div[@data-assoc-eid='ac-home-month-summary']//div[contains(@class, 'a-row')]//div[contains(@class, 'a-ws-span-last')]" ) total_sent = summaries[0].text total_gains = summaries[1].text total_ordered = summaries[2].text total_clicks = summaries[3].text driver.close() return f'Produtos pedidos: "{total_sent}" - Ganho: "{total_gains}" - Produtos pedidos: "{total_ordered}" - Cliques: "{total_clicks}"'
def lambda_handler(*args, **kwargs): driver = WebDriverWrapper() driver.get_url(week_data['es_url']) driver.get_soup() es_data.ProcessGamesToS3(driver.soup, week_id) driver.close() driver = WebDriverWrapper() driver.get_url(week_data['os_url']) driver.get_soup() os_data.ProcessGamesToS3(driver.soup) driver.close()
def lambda_handler(*args, **kwargs): driver = WebDriverWrapper() driver.get_url('https://www.backerkit.com/admins/sign_in') driver.set_input_value_byName('admin[email]', 'YOUR_EMAIL') driver.set_input_value_byName('admin[password]', 'YOUR_PASSWORD') driver.click_byName('commit') driver.get_url('URL_OF_EXPORT_PAGE_OF_DESIRED_SEGMENT') driver.set_input_value('XPATH_OF_FILE', '\n') driver.close()
#!/usr/bin/python # -*- coding: utf-8 -*- from webdriver_wrapper import WebDriverWrapper wd = WebDriverWrapper() wd.get_facebook() wd.go_to_page() # db.get_last_commented wd.scan_posts() wd.expand_older_posts() wd.quit_driver()