class AgendaBolsoCrawler: def __init__(self, day): self.driver = WebDriverWrapper()._driver self.wait = WebDriverWait(self.driver, 10) #Diario ou retroativo self.scrape(day) #self.retroativo() def retroativo(self): #Defina os dias de inicios e fim start = date(2019, 1, 1) end = date(2019, 7, 7) delta = end - start for i in range(delta.days): self.scrape(start + timedelta(days=i)) def scrape(self, day): items_agenda = [] day_string = day.strftime("%Y-%m-%d") print('Dia: ' + day_string) self.driver.get( 'https://www.gov.br/planalto/pt-br/acompanhe-o-planalto/agenda-do-presidente-da-republica/' + day_string) time.sleep(5) compromissos = self.driver.find_elements_by_class_name( "item-compromisso") try: for compromisso in compromissos: horario = '*Horário:* ' + compromisso.find_element_by_xpath( ".//time").text + '\n' titulo = compromisso.find_element_by_xpath( ".//h4[@class='compromisso-titulo']").text + '\n' local = '*Local:* ' + compromisso.find_element_by_xpath( ".//p[@class='compromisso-local']").text + '\n' items_agenda.append([horario, titulo, local]) if (items_agenda): self.send_to_chats(items_agenda, day_string) except: print('Not Found') def send_to_chats(self, rows, day_string): chats = [] string_rows = '\n'.join(''.join(row) for row in rows) string_rows = '*Agenda Bolsonaro ' + day_string + '* \n' + string_rows bot_updates = BOT_INSTANCE.getUpdates() for update in bot_updates: try: chats.append(update['message']['chat']['id']) except: print('empity message') #deduplicate chats = list(set(chats)) print(chats) print(string_rows) for chat in chats: BOT_INSTANCE.sendMessage(chat, string_rows, parse_mode='markdown')
def lambda_handler(*args, **kwargs): driver = WebDriverWrapper() example_text = '' sekinAl="https://seekingalpha.com/market-news/all" # driver.get("http://www.python.org") driver.get(sekinAl) # assert "Python" in driver.title # driver elem = driver.find_elements_by_class_name("title") for i in elem: if 'doge' in i.text print(i.text) # /html/body/div[3]/div/div/div[2]/ul/li[2]/div[2]/div[1] # elem.clear() # elem.send_keys("pycon") # elem.send_keys(Keys.RETURN) # assert "No results found." not in driver.page_source driver.close() return example_text
def lambda_handler(*args, **kwargs): driver = WebDriverWrapper()._driver db = firebase.FirebaseApplication( 'https://dash-associados-default-rtdb.firebaseio.com/', None) data = {"username": "******"} db.post('/users', data) # driver.get("https://associados.amazon.com.br") # driver.get_url('http://example.com') # example_text = driver.get_inner_html('(//div//h1)[1]') # driver.close() driver.get("https://associados.amazon.com.br") try: cookies_file = open("cookies.txt") if os.fstat(cookies_file.fileno()).st_size == 0: raise IOError for cookie in cookies_file: driver.add_cookie(json.loads(cookie)) except IOError: driver.find_element_by_xpath("//a[@href='/login']").click() username = driver.find_element_by_id("ap_email") username.clear() username.send_keys("*****@*****.**") password = driver.find_element_by_id("ap_password") password.clear() password.send_keys("infOaz19!") driver.find_element_by_id("signInSubmit").click() while ('home' not in driver.current_url): if 'approval' in driver.current_url: print('Aprove o login no celular.') fastrack = WebDriverWait(driver, 300).until( ec.visibility_of_element_located( (By.XPATH, "//div[@data-assoc-eid='ac-home-month-summary']"))) elif 'signin' in driver.current_url: captcha_img = driver.find_element_by_xpath( "//img[@alt='CAPTCHA']").get_attribute("src") print(captcha_img) captcha_input = driver.find_element_by_id("auth-captcha-guess") captcha = input("Digite o CAPTCHA e aperte ENTER\n") print(f'Usando o captcha "{captcha}"') captcha_input.send_keys(captcha) password = driver.find_element_by_id("ap_password") password.clear() password.send_keys("infOaz19") driver.find_element_by_id("signInSubmit").click() with open("cookies.txt", "w") as cookies_file: for cookie in driver.get_cookies(): cookies_file.write(json.dumps(cookie) + '\n') finally: cookies_file.close() summaries = driver.find_elements_by_xpath( "//div[@data-assoc-eid='ac-home-month-summary']//div[contains(@class, 'a-row')]//div[contains(@class, 'a-ws-span-last')]" ) total_sent = summaries[0].text total_gains = summaries[1].text total_ordered = summaries[2].text total_clicks = summaries[3].text driver.close() return f'Produtos pedidos: "{total_sent}" - Ganho: "{total_gains}" - Produtos pedidos: "{total_ordered}" - Cliques: "{total_clicks}"'