Ejemplo n.º 1
0
def execSearch(browser: webdriver):
    """
    Googleで検索を実行する
    :param browser: webdriver
    """

    # スクリーンショットのファイル名用に日付を取得
    dt = datetime.datetime.today()
    dtstr = dt.strftime("%Y%m%d%H%M%S")

    # get request
    browser.get("http://www.python.org")

    # check words int title
    assert "Python" in browser.title

    # select input elem
    elem = browser.find_element_by_name("q")
    # send key
    elem.clear()
    elem.send_keys("pycon")
    elem.send_keys(Keys.RETURN)

    # exists results?
    assert "No results found." not in browser.page_source

    # brower close (quit: close one tab)
    browser.close()
Ejemplo n.º 2
0
def download_data(browser_object: webdriver,
                  config_object: configparser.ConfigParser) -> BeautifulSoup:
    browser_object.get(config_object["rekyl_portal"]["url"])
    time.sleep(1)

    username = browser_object.find_element_by_id("username")
    password = browser_object.find_element_by_css_selector(
        "input[type=password")

    username.send_keys(config_object["rekyl_portal"]["username"])
    password.send_keys(config_object["rekyl_portal"]["password"])

    browser_object.find_element_by_id("button_login_security_low").click()

    time.sleep(3)

    show_errands = Select(browser_object.find_element_by_name("maxhits"))

    show_errands.select_by_index(10)
    time.sleep(5)
    browser_object.find_element_by_name("maxhits").send_keys(Keys.RETURN)
    time.sleep(5)

    iframe = browser_object.find_element_by_id("iframe_workorder")
    browser_object.switch_to.frame(iframe)
    iframe_source = browser_object.page_source
    soup = BeautifulSoup(iframe_source, "html.parser")

    browser_object.close()
    browser_object.quit()
    logging.info("Raw data downloaded")

    return soup
Ejemplo n.º 3
0
def scrape_classifica(brow: webdriver) -> None:
    """
	Scrape real data from website in order to check later how the algorithm is
	working.
	"""

    brow.get(f'{cfg.BASE_URL}classifica')
    time.sleep(3)

    dbf.empty_table(table='classifica')

    positions = brow.find_elements_by_xpath(
        './/table/tbody/tr[contains(@data-logo, ".png")]')

    columns = ['team', 'G', 'V', 'N', 'P', 'Gf', 'Gs', 'Dr', 'Pt', 'Tot']
    for pos in positions:
        team_data = []
        scroll_to_element(brow, pos)
        fields = pos.find_elements_by_xpath('.//td')[2:-2]

        for field in fields:
            team_data.append(field.text)

        dbf.db_insert(table='classifica', columns=columns, values=team_data)

    brow.close()
Ejemplo n.º 4
0
def driver_to_requests(driver: webdriver) -> requests.sessions.Session:
    cookies = driver.get_cookies()
    driver.close()
    session = requests.Session()
    for cookie in cookies:
        session.cookies.set(cookie["name"], cookie["value"])
    return session
def get_car(driver: webdriver, i: int) -> None:
    try:
        #time.sleep(10)
        wait: WebDriverWait = WebDriverWait(driver, 10)
        element = wait.until(
            EC.element_to_be_clickable(
                (By.XPATH,
                 '//*[@id="tbl_list_p"]/tbody/tr[' + str(i) + ']/td[3]/a')))
        element.click()
        tabs = driver.window_handles
        driver.switch_to.window(tabs[1])
        car: Dict[str] = {}
        car['description'] = driver.find_element_by_css_selector(
            'div.search-v>table.tableV').get_attribute('outerHTML')
        car['description'] += driver.find_element_by_xpath(
            '//*[@id="container"]/div[2]/div[3]/h2[2]').get_attribute(
                'outerHTML')
        car['description'] += driver.find_element_by_xpath(
            '//*[@id="container"]/div[2]/div[3]/table[2]').get_attribute(
                'outerHTML')
        #car['description'] += driver.find_element_by_xpath('//*[@id="container"]/div[2]/div[3]/h2[3]').get_attribute('outerHTML').rstrip("\n")
        #car['description'] += driver.find_element_by_xpath('//*[@id="container"]/div[2]/div[3]/div[2]').get_attribute('outerHTML').rstrip("\n")
        car['description'] = rm_new_line(car['description']).encode("utf-8")
        car['images'] = get_img_str(driver)
        car['count'] = '-1'
        car['activation'] = '1'
        car['currency'] = 'USD'
        download_images(get_img(driver))
        driver.close()
        driver.switch_to.window(tabs[0])
        return car
    except Exception as e:
        print('Can\'t find element. Reason: %s' % e)
Ejemplo n.º 6
0
def handle_room_tag(driver: webdriver, comm: str):
    """
    进入每一个直播, 并插播广告
    写入日志
    """
    # 将所有标签统计出来
    driver.execute_script(Order.page_end.value)
    sum_room = driver.find_elements_by_class_name(Order.room_tag.value)

    i = 0
    while i < len(sum_room):
        try:
            _into_room_handle(driver, comm, i)
        except ElementClickInterceptedException:
            move_down(driver)
            _into_room_handle(driver, comm, i)
        i += 1

    try:
        move_down(driver)
        tag = driver.find_element_by_css_selector(Order.page_down.value)
        if tag:
            time.sleep(1)
            tag.click()
            driver.implicitly_wait(5)
            handle_room_tag(driver, comm)

    except NoSuchElementException:
        print("finish")
        driver.close()
        return
Ejemplo n.º 7
0
def wikiloc_collect(driver: webdriver, url: str, cims_list: List[Cim]):
    """Top level call for wikiloc browser data collect."""
    wikiloc = WikiLoc(url)
    cims_info = wikiloc.collect(driver, cims_list)
    db_data = add_to_database(cims_info)
    print(db_data.qsize())
    # wikiloc.save(True, True)
    driver.close()
    print("Browser closed")
Ejemplo n.º 8
0
 def __init__(self, driver: webdriver):
     try:
         self.driver = driver
         self.driver.implicitly_wait(10)  # seconds
         self.driver.get('https://www.saucedemo.com/')
         self.login_field = self.driver.find_element_by_id('user-name')
         self.password_field = self.driver.find_element_by_id('password')
         self.login_btn = self.driver.find_element_by_css_selector(
             '.btn_action')
     except Exception:
         driver.close()
Ejemplo n.º 9
0
def _into_room_handle(driver: webdriver, comm: str, current_num: int):
    driver.implicitly_wait(5)

    room = driver.find_element_by_css_selector(Order.room_title.value.format(current_num+1))
    title = room.text
    room.click()

    driver.switch_to.window(driver.window_handles[1])
    send_comments(driver, title, comm)

    driver.close()
    driver.switch_to.window(driver.window_handles[0])
Ejemplo n.º 10
0
def close_firefox(driver: webdriver,
                  restart: bool = False,
                  lastp: bool = False,
                  no_ads_found: bool = False,
                  time_exc: bool = False) -> None:
    """Close, quit, destroy the webdriver instance of Firefox and safely close the DB connection."""

    if restart:
        print('Restart of Firefox requested.\n' 'Closing Firefox...\n')
        driver.close()
        driver.quit()
    elif time_exc:
        print('Failed to load the listing URL in time.\n'
              'Closing Firefox and retrying...\n')
        driver.close()
        driver.quit()
    elif lastp:
        print(
            'This was the last page that was set or found in the first listing URL.\n'
            'Closing Firefox...\n')
        driver.close()
        driver.quit()
        disconnect_db()
    elif no_ads_found:
        print('No ads found on the page. Breaking out of the loop.\n'
              'Closing Firefox...\n')
        driver.close()
        driver.quit()
        disconnect_db()
Ejemplo n.º 11
0
def login(user: User, enel_session: EnelMedSession, driver: webdriver) -> int:
    driver.get(LOG_IN_URL)
    all_cookies = driver.get_cookies()
    driver.close()
    cookies_str = create_cookies_header(all_cookies)
    enel_session.headers['cookie'] = cookies_str
    payload = {
        "Login": user.username,
        "Password": user.password,
        "IsAcceptedRule": "true"
    }
    response = enel_session.session.post(LOG_IN_URL,
                                         headers=enel_session.headers,
                                         data=payload)
    print(response.text)
Ejemplo n.º 12
0
def there_and_back_again(driver: webdriver, element):
    now = driver.current_window_handle
    element.click()

    wait = WebDriverWait(driver, 5)
    wait.until(EC.number_of_windows_to_be(2))

    windows = driver.window_handles

    for i in windows:
        if i != now:
            driver.switch_to.window(i)

            driver.close()
            print('1: ', driver.window_handles)
            driver.switch_to.window(now)
            return
Ejemplo n.º 13
0
    def make_reservation(self, sportjaDriver: webdriver):
        self.sportjaDriver = sportjaDriver
        _check_if_next_week(self)
        classnames = sportjaDriver.find_elements_by_class_name("classname")
        weightList = []
        for classname in classnames:
            if classname.text == "Weightlifting":
                weightList.append(classname)

        for elem in weightList[1:]:
            elem.click()

        sleep(1)
        reserveButton = None
        waitingListButton = None
        cancelButton = None

        try:
            cancelButton = sportjaDriver.find_element_by_css_selector(
                "a.grey_btn_small:nth-child(2) > span:nth-child(1)")
        except NoSuchElementException:
            print("No cancelButton found.")

        try:
            reserveButton = sportjaDriver.find_element_by_css_selector(
                "#book_btn > span:nth-child(1)")
        except NoSuchElementException:
            print("No reserveButton found.")

        try:
            waitingListButton = sportjaDriver.find_element_by_css_selector(
                "#join_waiting_list_btn")
        except NoSuchElementException:
            print("No waitingListButton found.")

        if reserveButton:
            reserveButton.click()
            sportjaDriver.close()
            print('Class reserved for next Saturday!')
            return 0
        elif waitingListButton:
            waitingListButton.click()
            sportjaDriver.close()
            print('Put on waiting list, check your mailbox')
            return 0
        elif cancelButton:
            sportjaDriver.close()
            print('Already reserved')
            return 0
        else:
            print('Can\'t reserve or put on waiting list')
            sportjaDriver.close()
            return 1
Ejemplo n.º 14
0
def gsc_work_wos_citations(browser: webdriver, work: Work) -> None:
    soup = work_wos_citations_request(
        browser, work['gsc_title'])  # send request and get the page source

    if soup.find('div', id='gs_res_ccl_mid') and soup.find(
            'div', class_='gs_r gs_or gs_scl'):
        results = soup.find_all('div', class_='gs_r gs_or gs_scl')

        for result in results:
            h3 = result.find('h3', class_="gs_rt")

            if h3.find("a"):
                h3 = h3.a

            search_title = sub(r"\s", ' ', sub(r"\s+", ' ', h3.text))
            search_title = sub(r"\[.*\]", '', search_title).strip().lower()
            profile_title = work['gsc_title'].lower()

            if search_title is not None and SequenceMatcher(
                    None, profile_title, search_title).ratio() >= 0.9:
                wos = result.find('a', class_='gs_nta gs_nph')
                work['wos_citations_count'] = wos.string.replace(
                    'Web of Science:', '').strip() if wos is not None else wos
                work['wos_citations_url'] = wos[
                    'href'] if wos is not None else wos
                print("WOS: ", work['wos_citations_count'], sep=" ")
                break
            else:
                logging_collector(
                    "INFO",
                    "TITLE MISMATCH",
                    [
                        profile_title,  # Title in profile
                        search_title,  # Title in search
                        SequenceMatcher(None, profile_title,
                                        search_title).ratio()
                    ])  # Coincidence

    sleep(0.5)
    browser.close()  # close the tab
    sleep(0.5)
    browser.switch_to.window(
        browser.window_handles[0])  # return to the main tab
Ejemplo n.º 15
0
 def click_next_page(self, driver: webdriver, fnav, snav) -> None:
     try:
         for nav in range(fnav, snav):
             driver.execute_script("fnSearch(" + str(nav) +
                                   ");return false; ")
             WebDriverWait(driver, 5).until(
                 EC.visibility_of_element_located((
                     By.XPATH,
                     '/html/body/div[1]/div[4]/div[2]/div[5]/table/tbody/tr'
                 )))
             print("Page " + str(nav))
             print("Cars len " + str(
                 len(
                     driver.find_elements_by_xpath(
                         '/html/body/div[1]/div[4]/div[2]/div[5]/table/tbody/tr'
                     ))))
             self.get_car(driver)
         driver.close()
     except Exception as e:
         print('Failed to Click Page Navigator %d. Reason: %s' % (nav, e))
Ejemplo n.º 16
0
def update_database(brow: webdriver, bets_to_update: list):

    bets_list = filter_by_color(brow)

    bets_list = filter_by_date(web_bets=bets_list, db_bets=bets_to_update)

    for bet in bets_list:

        status = get_bet_status(bet=bet)
        if not status:
            continue

        main_window = brow.current_window_handle
        new_window, brow = open_details(brow=brow, bet=bet)

        path = './/table[@class="bet-detail"]'
        wait_visible(brow, path)
        details = brow.find_element_by_xpath(path)

        bet_id, preds = cross_check_teams(table=details,
                                          bets_db=bets_to_update)
        if not bet_id:
            brow.close()
            brow.switch_to_window(main_window)
            continue

        for tm1, tm2, quote, result, label in preds:
            dbf.db_update(table='predictions',
                          columns=['quote', 'result', 'label'],
                          values=[quote, result, label],
                          where=(f'bet_id = {bet_id} AND ' +
                                 f'team1 = "{tm1}" AND team2 = "{tm2}"'))

        prize = get_prize(brow=brow)
        dbf.db_update(table='bets',
                      columns=['prize', 'result'],
                      values=[prize, status],
                      where=f'id = {bet_id}')

        brow.close()
        brow.switch_to_window(main_window)
Ejemplo n.º 17
0
 def get_car_data(self, driver: webdriver, i: int) -> None:
     c = inspect.currentframe()
     try:
         WebDriverWait(driver, 5).until(
             EC.element_to_be_clickable(
                 (By.XPATH,
                  '/html/body/div[1]/div[4]/div[2]/div[5]/table/tbody/tr[' +
                  str(i) + ']/td[5]/p/a[@class="a_list"]'))).click()
     except Exception as e:
         print('Can\'t click on car %d. Reason %s' % (i, e))
         driver.refresh()
     if self.alert_present(driver):
         return False
     tabs = driver.window_handles
     try:
         driver.switch_to.window(tabs[1])
     except Exception as e:
         print('Can\'t switch to new window. Reason %s' % e)
     self.download_images(self.get_img(driver))
     car: Dict[str] = {}
     car['category'] = self.category
     car['category_url'] = self.category_url
     try:
         car['title'] = self.ko_translate(
             driver.find_element_by_xpath(
                 '/html/body/div[1]/div[1]/h2').text, "en")
         car['title-seo'] = self.ko_translate(
             driver.find_element_by_xpath(
                 '/html/body/div[1]/div[1]/h2').text, "en")
     except:
         car['title'] = ''
         driver.refresh()
     try:
         car['price'] = int(
             driver.find_element_by_xpath(
                 '/html/body/div[1]/div[2]/div[2]/p/strong/em').text.
             replace(',', '')) * 9.10
     except:
         car['price'] = ''
         driver.refresh()
     try:
         car['description'] = '<div class="paper-tit"><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">Протокол осмотра / осмотра автомобилей, выставленных на аукцион</font></font></div>'
         el = WebDriverWait(driver, 5).until(
             EC.element_to_be_clickable((
                 By.CSS_SELECTOR,
                 'body > div.page-popup.exhibited-vehicle > div.vehicle-detail > div > div.vehicle-detail > div > div > div > table'
             )))
         if (len(el.get_attribute("outerHTML")) > 5000):
             i = 0
             while (i <= int(len(el.get_attribute("outerHTML")) / 4999)):
                 if (i == int(len(el.get_attribute("outerHTML")) / 4999)):
                     car['description'] += self.rm_new_line(
                         self.ko_translate(
                             el.get_attribute("outerHTML")
                             [i * 4999:len(el.get_attribute("outerHTML"))],
                             "ru"))
                 else:
                     car['description'] += self.rm_new_line(
                         self.ko_translate(
                             el.get_attribute("outerHTML")[i * 4999:i +
                                                           1 * 4999], "ru"))
                 i += 1
         else:
             car['description'] += self.rm_new_line(
                 self.ko_translate(el.get_attribute("outerHTML"), "ru"))
     except Exception as e:
         car['description'] += ''
         print(c.f_lineno)
         print('Can\'t get protocol view. Reason %s' % e)
         driver.refresh()
     try:
         car['description'] += '<h2 class="page-subtit mt60"><font style="vertical-align: inherit"><font style="vertical-align: inherit">Детали автомобиля</font></font></h2>'
         el = WebDriverWait(driver, 5).until(
             EC.element_to_be_clickable((
                 By.CSS_SELECTOR,
                 'body > div.page-popup.exhibited-vehicle > div.vehicle-detail > div.vehicle-detail-view > div.vehicle-detail > div.vehicle-detail_bar > table.tbl-v02'
             )))
         car['description'] += self.rm_new_line(
             self.ko_translate(el.get_attribute("outerHTML"), "ru"))
     except Exception as e:
         car['description'] += ''
         print(c.f_lineno)
         print('Can\'t get car details. Reason %s' % e)
         driver.refresh()
     try:
         car['description'] += '<h2 class="page-subtit mt60" id="view-status"><font style="vertical-align: inherit"><font style="vertical-align: inherit">Состояние кузова автомобиля</font></font></h2>'
         el = WebDriverWait(driver, 5).until(
             EC.element_to_be_clickable((
                 By.CSS_SELECTOR,
                 'body > div.page-popup.exhibited-vehicle > div.vehicle-detail > div.vehicle-detail-view > div.tab-status'
             )))
         car['description'] += self.rm_new_line(
             self.ko_translate(el.get_attribute("outerHTML"), "ru"))
     except Exception as e:
         car['description'] += ''
         print(c.f_lineno)
         print('Can\'t get car condition. Reason %s' % e)
         driver.refresh()
     try:
         car['description'] += '<h2 class="page-subtit mt60"><font style="vertical-align: inherit;"><font style="vertical-align: inherit;">Видео автомобиля</font></font></h2>'
         el = WebDriverWait(driver, 5).until(
             EC.element_to_be_clickable((
                 By.CSS_SELECTOR,
                 'body > div.page-popup.exhibited-vehicle > div.vehicle-detail > div.vehicle-detail-view > div:nth-child(5)'
             )))
         car['description'] += self.rm_new_line(
             self.ko_translate(el.get_attribute("outerHTML"), "ru"))
     except Exception as e:
         car['description'] += ''
         print(c.f_lineno)
         print('Can\'t get car control list. Reason %s' % e)
         driver.refresh()
     try:
         year = int(
             driver.find_element_by_css_selector(
                 'body > div.page-popup.exhibited-vehicle > div.vehicle-detail > div > div.vehicle-detail > div > div > div > table > tbody > tr:nth-child(5) > td:nth-child(4)'
             ).text)
     except Exception as e:
         print('Can\'t get car year. Reason %s' % e)
         driver.refresh()
     try:
         mark = self.ko_translate(
             driver.find_element_by_css_selector(
                 'body > div.page-popup.exhibited-vehicle > div.vehicle-detail > div > div.vehicle-detail > div > div > div > table > tbody > tr:nth-child(4) > td:nth-child(5)'
             ).text, "en")
     except Exception as e:
         print('Can\'t get car mark. Reason %s' % e)
         driver.refresh()
     try:
         color = self.ko_translate(
             driver.find_element_by_css_selector(
                 'body > div.page-popup.exhibited-vehicle > div.vehicle-detail > div > div.vehicle-detail > div > table > tbody > tr:nth-child(3) > td:nth-child(4)'
             ).text, "ru")
     except Exception as e:
         print('Can\'t get car color. Reason %s' % e)
         driver.refresh()
     try:
         fulel_data = {
             "가솔린": "Бензин",
             "디젤": "Дизель",
             "LPG": "LPG",
             "LPI하이브리드": "LPG гибрид",
             "가솔린하이브리드": "Бензиновый гибрид",
             "디젤하이브리드": "Дизельный гибрид",
             "전기": "Электрокар",
             "가솔린/LPG": "Бензин/LPG"
         }
         fuel = fulel_data[driver.find_element_by_css_selector(
             'body > div.page-popup.exhibited-vehicle > div.vehicle-detail > div > div.vehicle-detail > div > table > tbody > tr:nth-child(4) > td:nth-child(4)'
         ).text]
     except Exception as e:
         fuel = "Дизель"
         print('Can\'t get car fuel. Reason %s' % e)
         driver.refresh()
     try:
         res = re.findall(
             "\d+",
             driver.find_element_by_css_selector(
                 'body > div.page-popup.exhibited-vehicle > div.vehicle-detail > div > div.vehicle-detail > div > table > tbody > tr:nth-child(5) > td:nth-child(4)'
             ).text)
         displacement = int(''.join(res))
     except Exception as e:
         print('Can\'t get car displacement. Reason %s' % e)
         driver.refresh()
     try:
         transmission = driver.find_element_by_css_selector(
             'body > div.page-popup.exhibited-vehicle > div.vehicle-detail > div > div.vehicle-detail > div > table > tbody > tr:nth-child(2) > td:nth-child(4)'
         ).text == "자동" if "Автомат" else "Механика"
     except Exception as e:
         print('Can\'t get car transmission. Reason %s' % e)
         driver.refresh()
     try:
         car_type = self.ko_translate(
             driver.find_element_by_css_selector(
                 'body > div.page-popup.exhibited-vehicle > div.vehicle-detail > div > div.vehicle-detail > div > table > tbody > tr:nth-child(6) > td:nth-child(2)'
             ).text, "ru")
     except Exception as e:
         print('Can\'t get car type. Reason %s' % e)
         driver.refresh()
     try:
         lot_number = self.ko_translate(
             driver.find_element_by_css_selector(
                 'body > div.page-popup.exhibited-vehicle > div.vehicle-tit > p > strong'
             ).text, "ru")
     except Exception as e:
         print('Can\'t get car type. Reason %s' % e)
         driver.refresh()
     try:
         r = re.findall(
             "\d+",
             driver.find_element_by_css_selector(
                 'body > div.page-popup.exhibited-vehicle > div.vehicle-detail > div > div.vehicle-detail > div > table > tbody > tr:nth-child(1) > td:nth-child(4)'
             ).text)
         distance_driven = int(''.join(r))
     except Exception as e:
         print('Can\'t get car distance driven. Reason %s' % e)
         driver.refresh()
     car['images'] = self.get_img_str(driver)
     car['count'] = '0'
     car['activation'] = '1'
     car['currency'] = 'USD'
     car['recomended'] = '0'
     car['new'] = '0'
     car['weight'] = '0'
     car['article'] = lot_number
     car['properties'] = (
         'Цвет=[type=assortmentCheckBox value=%s product_margin=Желтый|Белый|Серебро|Красный|Фиолетовый|Оранжевый|Зеленый|Серый|Золото|Коричневый|Голубой|Черный|Бежевый]&Кузов=[type=assortmentCheckBox value=%s product_margin=Универсал|Фургон|Фура|Трактор|Седан|Родстер|Пикап|Мотоцикл|Минивен|Хэтчбек|Кроссовер|Купе|Кабриолет|Багги]&Пробег=%d&Двигатель=%d&Год=%d&Трансмиссия=[type=assortmentCheckBox value=%s product_margin=Механика|Автомат]&Топливо=[type=assortmentCheckBox value=%s product_margin=Дизель|Бензин|Газ]&Модель=%s&Марка=%s&Номер лота=%s&Аукцион=lotteautoauction'
         % (color, car_type, distance_driven, displacement, year,
            transmission, fuel, mark, self.category, lot_number))
     try:
         driver.close()
         driver.switch_to.window(tabs[0])
     except Exception as e:
         print('Can\'t switch to old window. Reason %s' % e)
     self.write_csv(car)
Ejemplo n.º 18
0
 def closeBrowser(wedriver: webdriver):
     wedriver.close()
Ejemplo n.º 19
0
    def fetch_image_urls(self,
                         query: str,
                         wd: webdriver,
                         sleep_between_interactions: int = 1,
                         max_timeout=5,
                         imgs_offset=5):
        def scroll_to_end(wd):
            wd.execute_script(
                "window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(sleep_between_interactions)

        # build the google query
        search_url = "https://www.google.com/search?safe=off&site=&tbm=isch&source=hp&q={q}&oq={q}&gs_l=img"

        # load the page
        wd.get(search_url.format(q=query))

        image_urls = list()
        reference_imgs_urls = list()
        wait = WebDriverWait(wd, max_timeout)
        results_start = 0
        #wd.window_handles
        while len(image_urls) < (self.imgs2download + imgs_offset):
            scroll_to_end(wd)
            # get all image thumbnail results
            #thumbnail_results = wd.find_elements_by_css_selector("img.rg_ic")
            wait.until(EC.presence_of_element_located((By.ID, "islrg")))
            thumbnail_div = wd.find_element_by_id('islrg')
            WebDriverWait(thumbnail_div, max_timeout).until(
                EC.presence_of_element_located((By.CLASS_NAME, "islrc")))
            thumbnail_div = thumbnail_div.find_elements_by_class_name(
                'islrc')[0]
            WebDriverWait(thumbnail_div, max_timeout).until(
                EC.presence_of_element_located((By.TAG_NAME, "div")))
            div_with_link_img = thumbnail_div.find_elements_by_tag_name('div')
            n_found_divs = len(div_with_link_img)
            for div_of_img in div_with_link_img[results_start:n_found_divs]:
                try:
                    if (len(image_urls) >= (self.imgs2download + imgs_offset)):
                        break
                    wd.switch_to_window(wd.window_handles[0])
                    possible_img_link = div_of_img.find_elements_by_tag_name(
                        'a')
                    #try:
                    for pos_link in possible_img_link:
                        possible_imgs = pos_link.find_elements_by_tag_name(
                            'img')
                        p_links = pos_link.get_attribute("href")
                        print("PL: ", p_links)
                        if (p_links != None):
                            reference_imgs_urls.append(p_links)

                        #Click on imgs in order to let the link appear
                        for img in possible_imgs:
                            w, h = int(img.get_attribute("width")), int(
                                img.get_attribute("width"))
                            if (w < 60 or h < 60):
                                possible_imgs.remove(img)
                                continue
                            else:
                                img.click()
                                #Wait until click have had effect
                                time.sleep(sleep_between_interactions)
                        if (len(possible_imgs) > 0):
                            new_img_url = pos_link.get_attribute("href")
                            if (new_img_url != None):
                                #print("URL IMG:  ", new_img_url)
                                wd.execute_script("window.open()")
                                wd.switch_to_window(wd.window_handles[1])
                                wd.get(new_img_url)
                                #wait until load new page
                                time.sleep(sleep_between_interactions)
                                wait.until(
                                    EC.presence_of_element_located(
                                        (By.TAG_NAME, "img")))
                                big_imgs = wd.find_elements_by_tag_name('img')
                                for big_img_index in range(len(big_imgs)):
                                    w, h = int(
                                        big_imgs[big_img_index].get_attribute(
                                            "width")), int(
                                                big_imgs[big_img_index].
                                                get_attribute("width"))
                                    if (w < 60 or h < 60):
                                        continue
                                    else:
                                        print(
                                            "IMG:", big_imgs[big_img_index].
                                            get_attribute("src"))
                                        image_urls.append(
                                            big_imgs[big_img_index].
                                            get_attribute("src"))
                                        break
                                wd.close()
                                wd.switch_to_window(wd.window_handles[0])
                except Exception as e:
                    print(f"ERROR - {e} (continue ...)")
                    for i in range(1, len(wd.window_handles)):
                        wd.close()
                    wd.switch_to_window(wd.window_handles[0])

            #LOAD MORE
            #Press load button if not enough imgs
            if len(image_urls) >= (self.imgs2download + imgs_offset):
                print(f"Found: {len(image_urls)} image links, done!")
                break
            else:
                print("Found:", len(image_urls),
                      "image links, looking for more ...")
                load_more_button = wd.find_element_by_css_selector(".mye4qd")
                if load_more_button:
                    wd.execute_script(
                        "document.querySelector('.mye4qd').click();")
                # move the result startpoint further down
                results_start = n_found_divs

        return image_urls, reference_imgs_urls
Ejemplo n.º 20
0
def download_chapter(driver: webdriver, chapter_url: str):
    driver.get(chapter_url)  # Open chapter

    settings = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((
            By.XPATH,
            "//div[starts-with(@class, 'Navigation-module_settingsContainer_')]"
        )))  # Find settings button with wait
    settings.click()  # Open settings
    driver.find_elements_by_xpath(
        "//div[starts-with(@class, 'Modal-module_quarity_')]")[2].click(
        )  # Set image quality
    settings.click()  # Open settings again
    driver.execute_script(
        'document.querySelector("input#mode-horizontal").removeAttribute("disabled")'
    )  # Endble horizontal if it disabled
    driver.execute_script(
        'document.querySelector("input#mode-horizontal").click()'
    )  # Turn into horizontal mode
    time.sleep(3)

    title = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located(
            (By.XPATH, '/html/body/div/div[2]/div[2]/div[3]/div[1]/div[2]/a/h1'
             ))).text  # Wait for page load after refresh
    chapter = driver.find_element_by_xpath(
        '/html/body/div/div[2]/div[2]/div[3]/div[1]/div[2]/div/p').text[1:]

    title = title.replace(':', '')

    # load all images
    pages = int(
        driver.find_element_by_xpath(
            '/html/body/div/div[2]/div[2]/div[2]/div[2]/p').text.split(' / ')
        [1])
    actions = ActionChains(driver)
    while True:
        actions.send_keys(Keys.LEFT).perform()
        time.sleep(1)
        if driver.find_element_by_xpath('/html/body/div/div[2]/div[2]/div[2]/div[2]/p').text == f'{pages - 1} / {pages}' or\
                driver.find_element_by_xpath('/html/body/div/div[2]/div[2]/div[2]/div[2]/p').text == f'{pages} / {pages}':
            break
    os.makedirs(os.path.dirname(f'.//{title}//{chapter}//'), exist_ok=True)
    for page, img in enumerate(driver.find_elements_by_class_name('zao-image'),
                               start=1):
        b64 = driver.execute_script(
            '''function getBase64Image(img)
            {
                var canvas = document.createElement("canvas");
                canvas.width = img.naturalWidth;
                canvas.height = img.naturalHeight;
                var ctx = canvas.getContext("2d");
                ctx.drawImage(img, 0, 0);
                var dataURL = canvas.toDataURL();
                return dataURL.replace(/^data:image\/(png|jpg);base64,/, "");
            }
            return getBase64Image(arguments[0])
            ''', img)
        image_data = base64.b64decode(b64)
        with open(f'.//{title}//{chapter}//{page}.png', 'wb') as file:
            file.write(image_data)

    driver.close()
Ejemplo n.º 21
0
def close_browser(driver: webdriver):
    driver.close()
    driver.quit()