def getDemons(name): list1 = [] list0 = [] start_chrome('https://pointercrate.com/demonlist/') press(END) press(PAGE_UP) click('Open the stats viewer!') write(name, into='Enter to search...') click(name) time.sleep(1) main_is_empty = 1 extended_is_empty = 1 mainverifs_is_empty = 1 extendedverifs_is_empty = 1 for x in range(75): counter = x + 1 if main_is_empty: mainlist = find_all(S('//*[@id="beaten"]/b[%d]/a' % (counter))) if extended_is_empty: extendedlist = find_all( S('//*[@id="beaten"]/span[%d]/a' % (counter))) if mainverifs_is_empty: verifications_main = find_all( S('//*[@id="verified"]/b[%d]/a' % (counter))) if extendedverifs_is_empty: verifications_extended = find_all( S('//*[@id="verified"]/span[%d]/a' % (counter))) if mainlist == []: main_is_empty = 0 if extendedlist == []: extended_is_empty = 0 if verifications_main == []: mainverifs_is_empty = 0 if verifications_extended == []: extendedverifs_is_empty = 0 list0.append(mainlist) list0.append(extendedlist) list0.append(verifications_main) list0.append(verifications_extended) list0 = [element for element in list0 if element != []] for demon in list0: demon = str(demon) start = [m.start() for m in re.finditer(">", demon)][0] end = [m.start() for m in re.finditer("<", demon)][1] demon = demon[(start + 1):end] list1.append(demon) kill_browser() return list1
def get_interactive_page_source(url): """Obtém código-fonte completo da página.""" # inicia o chrome para renderizar o código-fonte try: start_chrome(url, headless=True) except Exception: print( "Erro: você precisa instalar o Google Chrome e o ChromeDriver par" "a executar esse raspador.") sys.exit(1) driver = get_driver() # clica em todos os botões "Veja mais!" para liberar os dados dos resumos print( f"Raspando a página \"{driver.title}\". Isso pode demorar alguns segundos..." ) buttons = find_all(S("//span[@onClick]")) for _ in tqdm(range(len(buttons))): click("Veja mais!") print('Fim da raspagem da página.') # obtém objeto soup a partir do código-fonte renderizado pelo helium soup = BeautifulSoup(driver.page_source, 'html.parser') # fecha o chrome kill_browser() return soup
def test_find_by_id(self): self.assertFindsEltWithId(S("#checkBoxId"), 'checkBoxId')
def test_find_by_css_selector(self): self.assertFindsEltWithId(S('input.checkBoxClass'), 'checkBoxId')
def test_find_by_xpath(self): self.assertFindsEltWithId( S("//input[@type='checkbox' and @id='checkBoxId']"), 'checkBoxId')
def test_find_by_class(self): self.assertFindsEltWithId(S(".checkBoxClass"), 'checkBoxId')
def test_find_by_name(self): self.assertFindsEltWithId(S("@checkBoxName"), 'checkBoxId')
""" import json import sys from time import sleep from helium import S, click, kill_browser, start_chrome, wait_until from selenium import webdriver from selenium.common.exceptions import TimeoutException from proj_consts import ProjectConsts opts = webdriver.ChromeOptions() opts.set_capability("loggingPrefs", {"performance": "ALL"}) driver = start_chrome(ProjectConsts.BRAZIL_HEALTH_MINISTRY_URL, options=opts) wait_until(S("ion-button").exists) sleep(3) click("Arquivo CSV") global URL URL = None def process_browser_log_entry(entry): response = json.loads(entry["message"])["message"] return response def fetch_download_url(): global URL