def execSearch(browser: webdriver): """ Googleで検索を実行する :param browser: webdriver """ # スクリーンショットのファイル名用に日付を取得 dt = datetime.datetime.today() dtstr = dt.strftime("%Y%m%d%H%M%S") # get request browser.get("http://www.python.org") # check words int title assert "Python" in browser.title # select input elem elem = browser.find_element_by_name("q") # send key elem.clear() elem.send_keys("pycon") elem.send_keys(Keys.RETURN) # exists results? assert "No results found." not in browser.page_source # brower close (quit: close one tab) browser.close()
def download_data(browser_object: webdriver, config_object: configparser.ConfigParser) -> BeautifulSoup: browser_object.get(config_object["rekyl_portal"]["url"]) time.sleep(1) username = browser_object.find_element_by_id("username") password = browser_object.find_element_by_css_selector( "input[type=password") username.send_keys(config_object["rekyl_portal"]["username"]) password.send_keys(config_object["rekyl_portal"]["password"]) browser_object.find_element_by_id("button_login_security_low").click() time.sleep(3) show_errands = Select(browser_object.find_element_by_name("maxhits")) show_errands.select_by_index(10) time.sleep(5) browser_object.find_element_by_name("maxhits").send_keys(Keys.RETURN) time.sleep(5) iframe = browser_object.find_element_by_id("iframe_workorder") browser_object.switch_to.frame(iframe) iframe_source = browser_object.page_source soup = BeautifulSoup(iframe_source, "html.parser") browser_object.close() browser_object.quit() logging.info("Raw data downloaded") return soup
def scrape_classifica(brow: webdriver) -> None: """ Scrape real data from website in order to check later how the algorithm is working. """ brow.get(f'{cfg.BASE_URL}classifica') time.sleep(3) dbf.empty_table(table='classifica') positions = brow.find_elements_by_xpath( './/table/tbody/tr[contains(@data-logo, ".png")]') columns = ['team', 'G', 'V', 'N', 'P', 'Gf', 'Gs', 'Dr', 'Pt', 'Tot'] for pos in positions: team_data = [] scroll_to_element(brow, pos) fields = pos.find_elements_by_xpath('.//td')[2:-2] for field in fields: team_data.append(field.text) dbf.db_insert(table='classifica', columns=columns, values=team_data) brow.close()
def driver_to_requests(driver: webdriver) -> requests.sessions.Session: cookies = driver.get_cookies() driver.close() session = requests.Session() for cookie in cookies: session.cookies.set(cookie["name"], cookie["value"]) return session
def get_car(driver: webdriver, i: int) -> None: try: #time.sleep(10) wait: WebDriverWait = WebDriverWait(driver, 10) element = wait.until( EC.element_to_be_clickable( (By.XPATH, '//*[@id="tbl_list_p"]/tbody/tr[' + str(i) + ']/td[3]/a'))) element.click() tabs = driver.window_handles driver.switch_to.window(tabs[1]) car: Dict[str] = {} car['description'] = driver.find_element_by_css_selector( 'div.search-v>table.tableV').get_attribute('outerHTML') car['description'] += driver.find_element_by_xpath( '//*[@id="container"]/div[2]/div[3]/h2[2]').get_attribute( 'outerHTML') car['description'] += driver.find_element_by_xpath( '//*[@id="container"]/div[2]/div[3]/table[2]').get_attribute( 'outerHTML') #car['description'] += driver.find_element_by_xpath('//*[@id="container"]/div[2]/div[3]/h2[3]').get_attribute('outerHTML').rstrip("\n") #car['description'] += driver.find_element_by_xpath('//*[@id="container"]/div[2]/div[3]/div[2]').get_attribute('outerHTML').rstrip("\n") car['description'] = rm_new_line(car['description']).encode("utf-8") car['images'] = get_img_str(driver) car['count'] = '-1' car['activation'] = '1' car['currency'] = 'USD' download_images(get_img(driver)) driver.close() driver.switch_to.window(tabs[0]) return car except Exception as e: print('Can\'t find element. Reason: %s' % e)
def handle_room_tag(driver: webdriver, comm: str): """ 进入每一个直播, 并插播广告 写入日志 """ # 将所有标签统计出来 driver.execute_script(Order.page_end.value) sum_room = driver.find_elements_by_class_name(Order.room_tag.value) i = 0 while i < len(sum_room): try: _into_room_handle(driver, comm, i) except ElementClickInterceptedException: move_down(driver) _into_room_handle(driver, comm, i) i += 1 try: move_down(driver) tag = driver.find_element_by_css_selector(Order.page_down.value) if tag: time.sleep(1) tag.click() driver.implicitly_wait(5) handle_room_tag(driver, comm) except NoSuchElementException: print("finish") driver.close() return
def wikiloc_collect(driver: webdriver, url: str, cims_list: List[Cim]): """Top level call for wikiloc browser data collect.""" wikiloc = WikiLoc(url) cims_info = wikiloc.collect(driver, cims_list) db_data = add_to_database(cims_info) print(db_data.qsize()) # wikiloc.save(True, True) driver.close() print("Browser closed")
def __init__(self, driver: webdriver): try: self.driver = driver self.driver.implicitly_wait(10) # seconds self.driver.get('https://www.saucedemo.com/') self.login_field = self.driver.find_element_by_id('user-name') self.password_field = self.driver.find_element_by_id('password') self.login_btn = self.driver.find_element_by_css_selector( '.btn_action') except Exception: driver.close()
def _into_room_handle(driver: webdriver, comm: str, current_num: int): driver.implicitly_wait(5) room = driver.find_element_by_css_selector(Order.room_title.value.format(current_num+1)) title = room.text room.click() driver.switch_to.window(driver.window_handles[1]) send_comments(driver, title, comm) driver.close() driver.switch_to.window(driver.window_handles[0])
def close_firefox(driver: webdriver, restart: bool = False, lastp: bool = False, no_ads_found: bool = False, time_exc: bool = False) -> None: """Close, quit, destroy the webdriver instance of Firefox and safely close the DB connection.""" if restart: print('Restart of Firefox requested.\n' 'Closing Firefox...\n') driver.close() driver.quit() elif time_exc: print('Failed to load the listing URL in time.\n' 'Closing Firefox and retrying...\n') driver.close() driver.quit() elif lastp: print( 'This was the last page that was set or found in the first listing URL.\n' 'Closing Firefox...\n') driver.close() driver.quit() disconnect_db() elif no_ads_found: print('No ads found on the page. Breaking out of the loop.\n' 'Closing Firefox...\n') driver.close() driver.quit() disconnect_db()
def login(user: User, enel_session: EnelMedSession, driver: webdriver) -> int: driver.get(LOG_IN_URL) all_cookies = driver.get_cookies() driver.close() cookies_str = create_cookies_header(all_cookies) enel_session.headers['cookie'] = cookies_str payload = { "Login": user.username, "Password": user.password, "IsAcceptedRule": "true" } response = enel_session.session.post(LOG_IN_URL, headers=enel_session.headers, data=payload) print(response.text)
def there_and_back_again(driver: webdriver, element): now = driver.current_window_handle element.click() wait = WebDriverWait(driver, 5) wait.until(EC.number_of_windows_to_be(2)) windows = driver.window_handles for i in windows: if i != now: driver.switch_to.window(i) driver.close() print('1: ', driver.window_handles) driver.switch_to.window(now) return
def make_reservation(self, sportjaDriver: webdriver): self.sportjaDriver = sportjaDriver _check_if_next_week(self) classnames = sportjaDriver.find_elements_by_class_name("classname") weightList = [] for classname in classnames: if classname.text == "Weightlifting": weightList.append(classname) for elem in weightList[1:]: elem.click() sleep(1) reserveButton = None waitingListButton = None cancelButton = None try: cancelButton = sportjaDriver.find_element_by_css_selector( "a.grey_btn_small:nth-child(2) > span:nth-child(1)") except NoSuchElementException: print("No cancelButton found.") try: reserveButton = sportjaDriver.find_element_by_css_selector( "#book_btn > span:nth-child(1)") except NoSuchElementException: print("No reserveButton found.") try: waitingListButton = sportjaDriver.find_element_by_css_selector( "#join_waiting_list_btn") except NoSuchElementException: print("No waitingListButton found.") if reserveButton: reserveButton.click() sportjaDriver.close() print('Class reserved for next Saturday!') return 0 elif waitingListButton: waitingListButton.click() sportjaDriver.close() print('Put on waiting list, check your mailbox') return 0 elif cancelButton: sportjaDriver.close() print('Already reserved') return 0 else: print('Can\'t reserve or put on waiting list') sportjaDriver.close() return 1
def gsc_work_wos_citations(browser: webdriver, work: Work) -> None: soup = work_wos_citations_request( browser, work['gsc_title']) # send request and get the page source if soup.find('div', id='gs_res_ccl_mid') and soup.find( 'div', class_='gs_r gs_or gs_scl'): results = soup.find_all('div', class_='gs_r gs_or gs_scl') for result in results: h3 = result.find('h3', class_="gs_rt") if h3.find("a"): h3 = h3.a search_title = sub(r"\s", ' ', sub(r"\s+", ' ', h3.text)) search_title = sub(r"\[.*\]", '', search_title).strip().lower() profile_title = work['gsc_title'].lower() if search_title is not None and SequenceMatcher( None, profile_title, search_title).ratio() >= 0.9: wos = result.find('a', class_='gs_nta gs_nph') work['wos_citations_count'] = wos.string.replace( 'Web of Science:', '').strip() if wos is not None else wos work['wos_citations_url'] = wos[ 'href'] if wos is not None else wos print("WOS: ", work['wos_citations_count'], sep=" ") break else: logging_collector( "INFO", "TITLE MISMATCH", [ profile_title, # Title in profile search_title, # Title in search SequenceMatcher(None, profile_title, search_title).ratio() ]) # Coincidence sleep(0.5) browser.close() # close the tab sleep(0.5) browser.switch_to.window( browser.window_handles[0]) # return to the main tab
def click_next_page(self, driver: webdriver, fnav, snav) -> None: try: for nav in range(fnav, snav): driver.execute_script("fnSearch(" + str(nav) + ");return false; ") WebDriverWait(driver, 5).until( EC.visibility_of_element_located(( By.XPATH, '/html/body/div[1]/div[4]/div[2]/div[5]/table/tbody/tr' ))) print("Page " + str(nav)) print("Cars len " + str( len( driver.find_elements_by_xpath( '/html/body/div[1]/div[4]/div[2]/div[5]/table/tbody/tr' )))) self.get_car(driver) driver.close() except Exception as e: print('Failed to Click Page Navigator %d. Reason: %s' % (nav, e))
def update_database(brow: webdriver, bets_to_update: list): bets_list = filter_by_color(brow) bets_list = filter_by_date(web_bets=bets_list, db_bets=bets_to_update) for bet in bets_list: status = get_bet_status(bet=bet) if not status: continue main_window = brow.current_window_handle new_window, brow = open_details(brow=brow, bet=bet) path = './/table[@class="bet-detail"]' wait_visible(brow, path) details = brow.find_element_by_xpath(path) bet_id, preds = cross_check_teams(table=details, bets_db=bets_to_update) if not bet_id: brow.close() brow.switch_to_window(main_window) continue for tm1, tm2, quote, result, label in preds: dbf.db_update(table='predictions', columns=['quote', 'result', 'label'], values=[quote, result, label], where=(f'bet_id = {bet_id} AND ' + f'team1 = "{tm1}" AND team2 = "{tm2}"')) prize = get_prize(brow=brow) dbf.db_update(table='bets', columns=['prize', 'result'], values=[prize, status], where=f'id = {bet_id}') brow.close() brow.switch_to_window(main_window)
def get_car_data(self, driver: webdriver, i: int) -> None: c = inspect.currentframe() try: WebDriverWait(driver, 5).until( EC.element_to_be_clickable( (By.XPATH, '/html/body/div[1]/div[4]/div[2]/div[5]/table/tbody/tr[' + str(i) + ']/td[5]/p/a[@class="a_list"]'))).click() except Exception as e: print('Can\'t click on car %d. Reason %s' % (i, e)) driver.refresh() if self.alert_present(driver): return False tabs = driver.window_handles try: driver.switch_to.window(tabs[1]) except Exception as e: print('Can\'t switch to new window. Reason %s' % e) self.download_images(self.get_img(driver)) car: Dict[str] = {} car['category'] = self.category car['category_url'] = self.category_url try: car['title'] = self.ko_translate( driver.find_element_by_xpath( '/html/body/div[1]/div[1]/h2').text, "en") car['title-seo'] = self.ko_translate( driver.find_element_by_xpath( '/html/body/div[1]/div[1]/h2').text, "en") except: car['title'] = '' driver.refresh() try: car['price'] = int( driver.find_element_by_xpath( '/html/body/div[1]/div[2]/div[2]/p/strong/em').text. replace(',', '')) * 9.10 except: car['price'] = '' driver.refresh() try: car['description'] = '<div class="paper-tit"><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">Протокол осмотра / осмотра автомобилей, выставленных на аукцион</font></font></div>' el = WebDriverWait(driver, 5).until( EC.element_to_be_clickable(( By.CSS_SELECTOR, 'body > div.page-popup.exhibited-vehicle > div.vehicle-detail > div > div.vehicle-detail > div > div > div > table' ))) if (len(el.get_attribute("outerHTML")) > 5000): i = 0 while (i <= int(len(el.get_attribute("outerHTML")) / 4999)): if (i == int(len(el.get_attribute("outerHTML")) / 4999)): car['description'] += self.rm_new_line( self.ko_translate( el.get_attribute("outerHTML") [i * 4999:len(el.get_attribute("outerHTML"))], "ru")) else: car['description'] += self.rm_new_line( self.ko_translate( el.get_attribute("outerHTML")[i * 4999:i + 1 * 4999], "ru")) i += 1 else: car['description'] += self.rm_new_line( self.ko_translate(el.get_attribute("outerHTML"), "ru")) except Exception as e: car['description'] += '' print(c.f_lineno) print('Can\'t get protocol view. Reason %s' % e) driver.refresh() try: car['description'] += '<h2 class="page-subtit mt60"><font style="vertical-align: inherit"><font style="vertical-align: inherit">Детали автомобиля</font></font></h2>' el = WebDriverWait(driver, 5).until( EC.element_to_be_clickable(( By.CSS_SELECTOR, 'body > div.page-popup.exhibited-vehicle > div.vehicle-detail > div.vehicle-detail-view > div.vehicle-detail > div.vehicle-detail_bar > table.tbl-v02' ))) car['description'] += self.rm_new_line( self.ko_translate(el.get_attribute("outerHTML"), "ru")) except Exception as e: car['description'] += '' print(c.f_lineno) print('Can\'t get car details. Reason %s' % e) driver.refresh() try: car['description'] += '<h2 class="page-subtit mt60" id="view-status"><font style="vertical-align: inherit"><font style="vertical-align: inherit">Состояние кузова автомобиля</font></font></h2>' el = WebDriverWait(driver, 5).until( EC.element_to_be_clickable(( By.CSS_SELECTOR, 'body > div.page-popup.exhibited-vehicle > div.vehicle-detail > div.vehicle-detail-view > div.tab-status' ))) car['description'] += self.rm_new_line( self.ko_translate(el.get_attribute("outerHTML"), "ru")) except Exception as e: car['description'] += '' print(c.f_lineno) print('Can\'t get car condition. Reason %s' % e) driver.refresh() try: car['description'] += '<h2 class="page-subtit mt60"><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">Видео автомобиля</font></font></h2>' el = WebDriverWait(driver, 5).until( EC.element_to_be_clickable(( By.CSS_SELECTOR, 'body > div.page-popup.exhibited-vehicle > div.vehicle-detail > div.vehicle-detail-view > div:nth-child(5)' ))) car['description'] += self.rm_new_line( self.ko_translate(el.get_attribute("outerHTML"), "ru")) except Exception as e: car['description'] += '' print(c.f_lineno) print('Can\'t get car control list. Reason %s' % e) driver.refresh() try: year = int( driver.find_element_by_css_selector( 'body > div.page-popup.exhibited-vehicle > div.vehicle-detail > div > div.vehicle-detail > div > div > div > table > tbody > tr:nth-child(5) > td:nth-child(4)' ).text) except Exception as e: print('Can\'t get car year. Reason %s' % e) driver.refresh() try: mark = self.ko_translate( driver.find_element_by_css_selector( 'body > div.page-popup.exhibited-vehicle > div.vehicle-detail > div > div.vehicle-detail > div > div > div > table > tbody > tr:nth-child(4) > td:nth-child(5)' ).text, "en") except Exception as e: print('Can\'t get car mark. Reason %s' % e) driver.refresh() try: color = self.ko_translate( driver.find_element_by_css_selector( 'body > div.page-popup.exhibited-vehicle > div.vehicle-detail > div > div.vehicle-detail > div > table > tbody > tr:nth-child(3) > td:nth-child(4)' ).text, "ru") except Exception as e: print('Can\'t get car color. Reason %s' % e) driver.refresh() try: fulel_data = { "가솔린": "Бензин", "디젤": "Дизель", "LPG": "LPG", "LPI하이브리드": "LPG гибрид", "가솔린하이브리드": "Бензиновый гибрид", "디젤하이브리드": "Дизельный гибрид", "전기": "Электрокар", "가솔린/LPG": "Бензин/LPG" } fuel = fulel_data[driver.find_element_by_css_selector( 'body > div.page-popup.exhibited-vehicle > div.vehicle-detail > div > div.vehicle-detail > div > table > tbody > tr:nth-child(4) > td:nth-child(4)' ).text] except Exception as e: fuel = "Дизель" print('Can\'t get car fuel. Reason %s' % e) driver.refresh() try: res = re.findall( "\d+", driver.find_element_by_css_selector( 'body > div.page-popup.exhibited-vehicle > div.vehicle-detail > div > div.vehicle-detail > div > table > tbody > tr:nth-child(5) > td:nth-child(4)' ).text) displacement = int(''.join(res)) except Exception as e: print('Can\'t get car displacement. Reason %s' % e) driver.refresh() try: transmission = driver.find_element_by_css_selector( 'body > div.page-popup.exhibited-vehicle > div.vehicle-detail > div > div.vehicle-detail > div > table > tbody > tr:nth-child(2) > td:nth-child(4)' ).text == "자동" if "Автомат" else "Механика" except Exception as e: print('Can\'t get car transmission. Reason %s' % e) driver.refresh() try: car_type = self.ko_translate( driver.find_element_by_css_selector( 'body > div.page-popup.exhibited-vehicle > div.vehicle-detail > div > div.vehicle-detail > div > table > tbody > tr:nth-child(6) > td:nth-child(2)' ).text, "ru") except Exception as e: print('Can\'t get car type. Reason %s' % e) driver.refresh() try: lot_number = self.ko_translate( driver.find_element_by_css_selector( 'body > div.page-popup.exhibited-vehicle > div.vehicle-tit > p > strong' ).text, "ru") except Exception as e: print('Can\'t get car type. Reason %s' % e) driver.refresh() try: r = re.findall( "\d+", driver.find_element_by_css_selector( 'body > div.page-popup.exhibited-vehicle > div.vehicle-detail > div > div.vehicle-detail > div > table > tbody > tr:nth-child(1) > td:nth-child(4)' ).text) distance_driven = int(''.join(r)) except Exception as e: print('Can\'t get car distance driven. Reason %s' % e) driver.refresh() car['images'] = self.get_img_str(driver) car['count'] = '0' car['activation'] = '1' car['currency'] = 'USD' car['recomended'] = '0' car['new'] = '0' car['weight'] = '0' car['article'] = lot_number car['properties'] = ( 'Цвет=[type=assortmentCheckBox value=%s product_margin=Желтый|Белый|Серебро|Красный|Фиолетовый|Оранжевый|Зеленый|Серый|Золото|Коричневый|Голубой|Черный|Бежевый]&Кузов=[type=assortmentCheckBox value=%s product_margin=Универсал|Фургон|Фура|Трактор|Седан|Родстер|Пикап|Мотоцикл|Минивен|Хэтчбек|Кроссовер|Купе|Кабриолет|Багги]&Пробег=%d&Двигатель=%d&Год=%d&Трансмиссия=[type=assortmentCheckBox value=%s product_margin=Механика|Автомат]&Топливо=[type=assortmentCheckBox value=%s product_margin=Дизель|Бензин|Газ]&Модель=%s&Марка=%s&Номер лота=%s&Аукцион=lotteautoauction' % (color, car_type, distance_driven, displacement, year, transmission, fuel, mark, self.category, lot_number)) try: driver.close() driver.switch_to.window(tabs[0]) except Exception as e: print('Can\'t switch to old window. Reason %s' % e) self.write_csv(car)
def closeBrowser(wedriver: webdriver): wedriver.close()
def fetch_image_urls(self, query: str, wd: webdriver, sleep_between_interactions: int = 1, max_timeout=5, imgs_offset=5): def scroll_to_end(wd): wd.execute_script( "window.scrollTo(0, document.body.scrollHeight);") time.sleep(sleep_between_interactions) # build the google query search_url = "https://www.google.com/search?safe=off&site=&tbm=isch&source=hp&q={q}&oq={q}&gs_l=img" # load the page wd.get(search_url.format(q=query)) image_urls = list() reference_imgs_urls = list() wait = WebDriverWait(wd, max_timeout) results_start = 0 #wd.window_handles while len(image_urls) < (self.imgs2download + imgs_offset): scroll_to_end(wd) # get all image thumbnail results #thumbnail_results = wd.find_elements_by_css_selector("img.rg_ic") wait.until(EC.presence_of_element_located((By.ID, "islrg"))) thumbnail_div = wd.find_element_by_id('islrg') WebDriverWait(thumbnail_div, max_timeout).until( EC.presence_of_element_located((By.CLASS_NAME, "islrc"))) thumbnail_div = thumbnail_div.find_elements_by_class_name( 'islrc')[0] WebDriverWait(thumbnail_div, max_timeout).until( EC.presence_of_element_located((By.TAG_NAME, "div"))) div_with_link_img = thumbnail_div.find_elements_by_tag_name('div') n_found_divs = len(div_with_link_img) for div_of_img in div_with_link_img[results_start:n_found_divs]: try: if (len(image_urls) >= (self.imgs2download + imgs_offset)): break wd.switch_to_window(wd.window_handles[0]) possible_img_link = div_of_img.find_elements_by_tag_name( 'a') #try: for pos_link in possible_img_link: possible_imgs = pos_link.find_elements_by_tag_name( 'img') p_links = pos_link.get_attribute("href") print("PL: ", p_links) if (p_links != None): reference_imgs_urls.append(p_links) #Click on imgs in order to let the link appear for img in possible_imgs: w, h = int(img.get_attribute("width")), int( img.get_attribute("width")) if (w < 60 or h < 60): possible_imgs.remove(img) continue else: img.click() #Wait until click have had effect time.sleep(sleep_between_interactions) if (len(possible_imgs) > 0): new_img_url = pos_link.get_attribute("href") if (new_img_url != None): #print("URL IMG: ", new_img_url) wd.execute_script("window.open()") wd.switch_to_window(wd.window_handles[1]) wd.get(new_img_url) #wait until load new page time.sleep(sleep_between_interactions) wait.until( EC.presence_of_element_located( (By.TAG_NAME, "img"))) big_imgs = wd.find_elements_by_tag_name('img') for big_img_index in range(len(big_imgs)): w, h = int( big_imgs[big_img_index].get_attribute( "width")), int( big_imgs[big_img_index]. get_attribute("width")) if (w < 60 or h < 60): continue else: print( "IMG:", big_imgs[big_img_index]. get_attribute("src")) image_urls.append( big_imgs[big_img_index]. get_attribute("src")) break wd.close() wd.switch_to_window(wd.window_handles[0]) except Exception as e: print(f"ERROR - {e} (continue ...)") for i in range(1, len(wd.window_handles)): wd.close() wd.switch_to_window(wd.window_handles[0]) #LOAD MORE #Press load button if not enough imgs if len(image_urls) >= (self.imgs2download + imgs_offset): print(f"Found: {len(image_urls)} image links, done!") break else: print("Found:", len(image_urls), "image links, looking for more ...") load_more_button = wd.find_element_by_css_selector(".mye4qd") if load_more_button: wd.execute_script( "document.querySelector('.mye4qd').click();") # move the result startpoint further down results_start = n_found_divs return image_urls, reference_imgs_urls
def download_chapter(driver: webdriver, chapter_url: str): driver.get(chapter_url) # Open chapter settings = WebDriverWait(driver, 10).until( EC.presence_of_element_located(( By.XPATH, "//div[starts-with(@class, 'Navigation-module_settingsContainer_')]" ))) # Find settings button with wait settings.click() # Open settings driver.find_elements_by_xpath( "//div[starts-with(@class, 'Modal-module_quarity_')]")[2].click( ) # Set image quality settings.click() # Open settings again driver.execute_script( 'document.querySelector("input#mode-horizontal").removeAttribute("disabled")' ) # Endble horizontal if it disabled driver.execute_script( 'document.querySelector("input#mode-horizontal").click()' ) # Turn into horizontal mode time.sleep(3) title = WebDriverWait(driver, 10).until( EC.presence_of_element_located( (By.XPATH, '/html/body/div/div[2]/div[2]/div[3]/div[1]/div[2]/a/h1' ))).text # Wait for page load after refresh chapter = driver.find_element_by_xpath( '/html/body/div/div[2]/div[2]/div[3]/div[1]/div[2]/div/p').text[1:] title = title.replace(':', '') # load all images pages = int( driver.find_element_by_xpath( '/html/body/div/div[2]/div[2]/div[2]/div[2]/p').text.split(' / ') [1]) actions = ActionChains(driver) while True: actions.send_keys(Keys.LEFT).perform() time.sleep(1) if driver.find_element_by_xpath('/html/body/div/div[2]/div[2]/div[2]/div[2]/p').text == f'{pages - 1} / {pages}' or\ driver.find_element_by_xpath('/html/body/div/div[2]/div[2]/div[2]/div[2]/p').text == f'{pages} / {pages}': break os.makedirs(os.path.dirname(f'.//{title}//{chapter}//'), exist_ok=True) for page, img in enumerate(driver.find_elements_by_class_name('zao-image'), start=1): b64 = driver.execute_script( '''function getBase64Image(img) { var canvas = document.createElement("canvas"); canvas.width = img.naturalWidth; canvas.height = img.naturalHeight; var ctx = canvas.getContext("2d"); ctx.drawImage(img, 0, 0); var dataURL = canvas.toDataURL(); return dataURL.replace(/^data:image\/(png|jpg);base64,/, ""); } return getBase64Image(arguments[0]) ''', img) image_data = base64.b64decode(b64) with open(f'.//{title}//{chapter}//{page}.png', 'wb') as file: file.write(image_data) driver.close()
def close_browser(driver: webdriver): driver.close() driver.quit()