Beispiel #1
0
def login_website(chrome_webdriver: WebDriver, email: str,
                  password: str) -> None:
    """
    Login at Brainscape
    """

    chrome_webdriver.find_element_by_class_name("login-link").click()
    chrome_webdriver.find_element_by_id("email").send_keys(email)
    chrome_webdriver.find_element_by_id("password").send_keys(password)
    chrome_webdriver.find_element_by_xpath(
        "//span[contains(@class, 'label') and text() = 'Log In']").click()
    sleep(3)
Beispiel #2
0
def info_take_from(brower: WebDriver, tag: str) -> List[str]:
    '''
    提取附近内容的店面信息
    :param brower: 浏览器状态
    :param tag: 标签,例如:美食,住宿
    :return: 列表元素是 每个页面的信息
    '''
    # 点击第一个搜索结果,再点击附近,输入搜索内容
    brower.find_element_by_xpath('//*[@id="card-1"]/div/ul/li[1]').click()
    sleep(1)
    brower.find_element_by_xpath(
        '//*[@id="generalinfo"]/div[1]/div[1]').click()
    sleep(1)
    around_input = brower.find_element_by_xpath('//*[@id="nearby-input"]')
    around_input.send_keys(tag)
    around_input.send_keys(Keys.ENTER)
    sleep(2)

    # 点击缩小页面布局
    # brower.find_element_by_xpath('//*[@id="map-operate"]/div[2]/div[2]/div[2]').click()
    # sleep(10)

    # 附近信息提取,还要点击页面提取下一个页面信息
    info = [brower.find_element_by_class_name('poilist').text]
    while True:
        if '米' in info[-1]:
            try:
                page_location = brower.find_element_by_xpath(
                    '//*[@id="poi_page"]/p')
                # 由于不同位置页面元素不同,选择最后一个页面按钮
                next_page = page_location.find_elements_by_tag_name('span')[-1]
                # 判断是否是最后一个页面按钮是否可以点击
                next_page_status = next_page.find_element_by_tag_name(
                    'a').get_attribute("onclick")
                if next_page_status is not None:
                    next_page.click()
                    sleep(2)
                    info.append(
                        brower.find_element_by_class_name('poilist').text)
                else:
                    break
            except NoSuchElementException as e:
                break
        else:
            break

    brower.close()
    if len(info) == 1 and '米' not in info[0]: info = []
    return info
Beispiel #3
0
def GetEndpoints(driver: WebDriver, trace: bool = False) -> Dict[str, str]:
    """Get a list of endpoints to fetch."""
    driver.get("https://developer.tdameritrade.com/apis")
    elem = driver.find_element_by_class_name('view-smartdocs-models')
    categories = {}
    for row in elem.find_elements_by_class_name('views-row'):
        category = CleanName(row.text.splitlines()[0])
        link = row.find_element_by_tag_name('a').get_attribute('href')
        categories[category] = link
    if trace:
        pprint.pprint(categories)

    # Process each of the categories.
    endpoints = []
    for catname, catlink in sorted(categories.items()):
        logging.info("Getting %s", catlink)
        driver.get(catlink)
        for row in driver.find_elements_by_class_name('views-row'):
            link = row.find_element_by_tag_name('a').get_attribute('href')
            method, funcname, url = row.text.splitlines()[:3]
            funcname = CleanName(funcname.strip())
            endpoints.append((catname, funcname, method, url, link))
    if trace:
        pprint.pprint(endpoints)

    return endpoints
Beispiel #4
0
def get_children_decks(chrome_webdriver: WebDriver,
                       child: WebElement) -> List[WebElement]:
    """
    Get the list of decks of each parent deck
    """
    parent_element = chrome_webdriver.find_element_by_class_name("deck-list")
    decks = parent_element.find_elements_by_class_name("dashboard-deck-row")
    return decks
Beispiel #5
0
def GetErrorCodes(driver: WebDriver) -> Dict[int, str]:
    """Extract a table of code -> message string."""
    elem = driver.find_element_by_class_name('table-error-codes')
    errcodes = {}
    for tr in elem.find_elements_by_class_name('listErrorCodes'):
        code, message = [td.text for td in tr.find_elements_by_tag_name('td')]
        errcodes[int(code)] = message
    return errcodes
Beispiel #6
0
    def __init__(self, driver: WebDriver):
        self.driver = driver

        self.title = driver.find_element_by_class_name('at-story__title').text
        self.article = driver.find_element_by_css_selector(
            '.at-story__article article').get_attribute('innerHTML')
        self.answer_els = driver.find_elements_by_css_selector(
            '.at-story__answers ul li a')
Beispiel #7
0
def find_parent_decks(chrome_webdriver: WebDriver,
                      parent_deck_class: str) -> List[WebElement]:
    """
    Find parent deck class > li
    """
    parent_deck_element = chrome_webdriver.find_element_by_class_name(
        "user-packs")
    children = parent_deck_element.find_elements_by_tag_name("li")
    return children
Beispiel #8
0
def get_file_name_for_csv_files(chrome_webdriver: WebDriver) -> str:
    """
    Get filename for csv file
    """
    raw_text = chrome_webdriver.find_element_by_class_name(
        "new-modal-title").text
    valid_chars = f"-_.(){ascii_letters}{digits}ÁÀÂÃÉÈÊÍÏÓÔÕÖÚÇÑáàâãéèêíïóôõöúçñ "
    filename = "".join(char for char in raw_text if char in valid_chars)
    return filename
Beispiel #9
0
def check_dialog(driver: WebDriver, name, timeout=30):
    try:
        driver.find_element_by_class_name("truste_overlay")
        print(f"{name}: show trust_overlay")
        WebDriverWait(driver, timeout).until(
            EC.presence_of_element_located(
                (By.CSS_SELECTOR,
                 "[title='TrustArc Cookie Consent Manager']")))
        trust_frame = driver.find_element_by_css_selector(
            "[title='TrustArc Cookie Consent Manager']")
        driver.switch_to.frame(trust_frame)
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CLASS_NAME, "pdynamicbutton")))
        cookie_dialog = driver.find_element_by_class_name("pdynamicbutton")
        cookie_dialog.find_element_by_class_name("call").click()
        print(f"{name}: click truste_overlay")
        sleep(5)
        driver.switch_to.default_content()
    except WebDriverException as e:
        driver.switch_to.default_content()
        print(f'{name}: {e.msg}')
Beispiel #10
0
def find_more_butto_and_click(driver: WebDriver):
    more_button = None
    for more_btn_class in MORE_BUTTON_CLASS:
        try:
            more_button = driver.find_element_by_class_name(more_btn_class)
        except:
            pass
        if more_button is not None:
            print("点击[继续阅读]按钮,加载所有子页面")
            driver.execute_script("arguments[0].click();", more_button)
            time.sleep(2)
            return
    print("没有找到[继续阅读]按钮")
Beispiel #11
0
class SeleniumModule():
    def __init__(self):
        self.selenium = WebDriver(executable_path='Chromeドライバのパス')

    def quit(self):
        self.selenium.quit()

    def login(self, login_url):
        self.selenium.get(login_url)
        username_input = self.selenium.find_element_by_name("username")
        username_input.send_keys('username')
        password_input = self.selenium.find_element_by_name("password")
        password_input.send_keys('password')
        self.selenium.find_element_by_class_name('btn').click()

    def get_page_data(self, url, tag, attribute):
        res = requests.get(url)
        soup = BeautifulSoup(res.text, 'html.parser')
        # example
        # soup.find_all('a', {'class': 'r'})
        link_list = soup.find_all(tag, attribute)
        return link_list
Beispiel #12
0
 def _scrape_price_by_book_details(self, page_url, driver: WebDriver):
     try:
         open_new_tab(driver)
         driver.get(page_url)
         section = driver.find_element_by_class_name("section-1")
         container = section.find_element_by_class_name("container")
         book_price = container.find_elements_by_class_name(
             "book-price")[1].text
         close_current_tab(driver)
         return book_price
     except Exception as error:
         close_current_tab(driver)
         book_price = "0"
     return book_price
Beispiel #13
0
def get_cards_info_of_deck(chrome_webdriver: WebDriver, deck: WebElement):
    """
    Get the cards info of each deck
    """

    try:
        glasses_icon = deck.find_element_by_class_name(
            "ion-ios-glasses-outline")
        glasses_icon.click()
        sleep(2)
        cards_window_selection = chrome_webdriver.find_element_by_class_name(
            "preview-card-table")
        cards_list = cards_window_selection.find_elements_by_class_name(
            "preview-card")
        csv_file_name = get_file_name_for_csv_files(chrome_webdriver).replace(
            "Preview", "").strip()
        path = os.getcwd()
        path_csv = f"{path}/csv_sem_utf8_delimiter_virgula/"
        try:
            os.mkdir(path_csv)
        except OSError:
            pass
        with open(f"{path_csv}{csv_file_name}.csv", "w",
                  newline="") as csv_file:
            writer = csv.writer(csv_file, delimiter=",")
            for card in cards_list:
                front_info = get_card_text_img_audio_data(card, "front")
                back_info = get_card_text_img_audio_data(card, "back")
                writer.writerow([front_info, back_info])
        color_options = [
            Fore.GREEN, Fore.MAGENTA, Fore.CYAN, Fore.BLUE, Fore.YELLOW
        ]
        print(choice(color_options) + f"{csv_file_name}.csv salvo!")

        chrome_webdriver.find_element_by_class_name("close-button").click()
    except NoSuchElementException:
        pass
Beispiel #14
0
def check_another_connection(driver: WebDriver, name) -> bool:
    try:
        driver.switch_to.default_content()
        notification = driver.find_element_by_class_name(
            'notification-wrapper')
        if notification.find_element_by_tag_name(
                'span'
        ).text == 'Your session was transferred to another browser tab.':
            print(f"{name}: run on another window")
            return True
        else:
            return False
    except WebDriverException:
        driver.switch_to.frame("iframetab1")
        return False
Beispiel #15
0
def select(driver: WebDriver, first_menu_index: int, second_menu_index: int, third_menu_index: int):
    # 点击CRM系统
    crmSystemElement = driver.find_element_by_class_name("menu___1QsMw").find_elements_by_class_name("menu-item___3QMia")[first_menu_index]
    crmSystemElement.click()
    # 点击个人工作台
    subMenu = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.CLASS_NAME, "sider___1t24v")))
    personalConsole = subMenu.find_elements_by_class_name("ant-menu-submenu")[second_menu_index]
    personalConsole.click()
    # 点击我的公海
    inlineMenu = WebDriverWait(driver, 5).until(EC.visibility_of_element_located((By.XPATH, '//ul[contains(@id,"$Menu")]')))
    myPublicSea = inlineMenu.find_elements_by_class_name("ant-menu-item")[third_menu_index]
    myPublicSea.click()

    iframe = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.TAG_NAME, "iframe")))  # 我的公海iframe区域
    driver.switch_to.frame(iframe)
class Application:

    def __init__(self):
        self.wd = WebDriver()
        self.wd.implicitly_wait(5)
        self.session = SessionHelper(self)
        self.payment = PaymentHelper(self)
        self.localization = LocalizationHelper(self)
        self.navigation = NavigationHelper(self)
        self.login_page = LoginPage(self)
        self.currency = Currency(self)
        self.payment_systems = PaymentSystems(self)

    def experiment(self, a):
        self.wd.find_element(By.cssSelector('span[class=\"title\"]:contains(' + someText + ')'))
        self.wd.find_element_by_xpath()
        self.wd.find_element_by_class_name('forgot')
        self.wd.find_element_by_link_text('Forgot password')
        self.wd.find_element_by_css_selector()

    def check_exists_by_css_selector(self, selector):
        try:
            self.wd.find_elements_by_css_selector(selector)
        except NoSuchElementException:
            return False
        return True

    def check_exists_by_xpath(self, xpath):
        try:
            self.wd.find_element_by_xpath(xpath)
        except NoSuchElementException:
            return False
        return True

    def destroy(self):
        self.wd.quit()
Beispiel #17
0
 def _scrape_pages_count(self, url, driver: WebDriver):
     page_without_pagination_count = 1
     try:
         driver.get(url)
         pages = driver.find_element_by_class_name("pagination")
         standard_format = unidecode(pages.text)
         pages_list = list(standard_format.split(" "))
         numbers = []
         for item in pages_list:
             for subitem in item.split():
                 if subitem.isdigit():
                     numbers.append(subitem)
         numbers = list(map(int, numbers))
         return numbers[-1]
     except Exception as e:
         return page_without_pagination_count
Beispiel #18
0
class WebDriverContainer:
    def __init__(self, headless: bool = False) -> None:
        ensure_driver_installed()
        print(f"Opening web browser...")
        options = Options()
        options.headless = headless
        self.driver = WebDriver(CHROME_DRIVER_INSTALL_PATH, options=options)
        self.driver.implicitly_wait(SELENIUM_TIME_TO_WAIT_IN_SECONDS)

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        print(f"Closing web browser...")
        self.driver.close()

    def get(self, url: str, print_log: bool = True):
        if print_log:
            print(f"Retrieve url [{url}]...")
        self.driver.get(url)

    def find_element_by_tag_name(self, name: str):
        WebDriverWait(self.driver, SELENIUM_TIME_TO_WAIT_IN_SECONDS) \
            .until(expected_conditions.presence_of_element_located((By.TAG_NAME, name)))
        return self.driver.find_element_by_tag_name(name)

    def find_element_by_id(self, id_: str):
        WebDriverWait(self.driver, SELENIUM_TIME_TO_WAIT_IN_SECONDS) \
            .until(expected_conditions.presence_of_element_located((By.ID, id_)))
        return self.driver.find_element_by_id(id_)

    def find_element_by_class_name(self, name: str):
        WebDriverWait(self.driver, SELENIUM_TIME_TO_WAIT_IN_SECONDS) \
            .until(expected_conditions.presence_of_element_located((By.CLASS_NAME, name)))
        return self.driver.find_element_by_class_name(name)

    @property
    def current_url(self):
        return self.driver.current_url
class ElementActions:

    def __init__(self, driver):
        """
        :WebDriver driver: object
        """
        self.driver = WebDriver()
        self.actions = ActionChains(self.driver)

    def __findElement(self, locator=''):
        val = locator.split(":")
        if val[0] == 'cssselector':
            element = self.driver.find_element_by_css_selector(val[1])
        elif val[0] == 'xpath':
            element = self.driver.find_element_by_xpath(val[1])
        elif val[0] == 'id':
            element = self.driver.find_element_by_id(val[1])
        elif val[0] == 'name':
            element = self.driver.find_element_by_name(val[1])
        elif val[0] == 'class':
            element = self.driver.find_element_by_class_name(val[1])

        return element

    def clickThis(self, locator=""):
        self.__findElement(locator).click()
        return self

    def enterText(self, locator="", value="testData"):
        self.__findElement(locator).send_keys(value)
        return self

    def getWebElement(self, locator=""):
        return self.__findElement(locator)

    def hoverElement(self, locator=''):
        self.actions.move_to_element(locator).perform()
        return self
Beispiel #20
0
def cloud189(chrome: WebDriver, url="", password=""):
    if url != '':
        chrome.get(url)

    while password != '':
        try:
            sleep(0.5)
            chrome.find_element_by_id("code_txt").send_keys(password)
            chrome.find_element_by_partial_link_text("访问").click()
            break
        except Exception:
            continue

    while True:
        try:
            sleep(0.5)
            download_button = chrome.find_element_by_class_name("btn-download")
            ActionChains(chrome).move_to_element(download_button).perform()
            download_button.click()
            break
        except Exception:
            continue

    for i in range(4):
        try:
            sleep(0.5)
            chrome.switch_to.frame("udb_login")
            chrome.find_element_by_xpath(
                '''//*[@id="userName"]''').send_keys("18953197117")
            sleep(1)
            chrome.find_element_by_xpath(
                '''//*[@id="password"]''').send_keys("Cb19985466")
            sleep(1)
            chrome.find_element_by_id("j-login").click()
            break
        except Exception:
            continue
Beispiel #21
0
def safely_find_class(driver: WebDriver, value, sleepTime: float = 0.2):
    try:
        return driver.find_element_by_class_name(value)
    except WebDriverException:
        sleep(sleepTime)
        return safely_find_class(driver, value, sleepTime)
Beispiel #22
0
        return [row_anchor.get_attribute('href')]
    return []


url = 'http://www.apothekenindex.at'
products = ['productListing-odd', 'productListing-even']
results = pd.DataFrame(
    columns=['Name', 'Address', 'Tel', 'Fax', 'Email', 'Schedule', 'Link'])
options = Options()
options.page_load_strategy = 'eager'
# options.set_headless(False)
driver = WebDriver(ChromeDriverManager().install(), options=options)
driver.get(url)

# step 1) get all states links
table = driver.find_element_by_class_name('infoBoxContents')
states = table.find_elements_by_class_name('parent')
states_links = []

for state in states:
    state_list = state.find_element_by_tag_name('ul')
    state_anchors = state_list.find_elements_by_tag_name('a')
    states_links += [a.get_attribute('href')
                     for a in state_anchors if a.get_attribute('title') == '']

# Step 2) get all pharmacies links
pharmacies_links = []
for state_link in states_links:
    page = state_link
    has_next = True
    while has_next is True:
import time
from selenium.webdriver.chrome.webdriver import WebDriver

driver = WebDriver(
    executable_path=
    "/Users/magic/PycharmProjects/zywa-spider-xiaociwei/plug/chromedriver/mac/chromedriver"
)
driver.get("http://202.110.217.69:7001/hsp/logonDialog_113.jsp")
driver.find_element_by_id("yhmInput").send_keys("371083198706245037")
time.sleep(0.5)
driver.find_element_by_id("mmInput").send_keys("2078")
time.sleep(0.5)
print('等待用户输入验证码')
yymCode = input()
driver.find_element_by_id("validatecodevalue1").send_keys(yymCode)

driver.find_element_by_class_name("logonBtn").click()
time.sleep(5)
# C1001
driver.find_element_by_id("C1001").click()
Beispiel #24
0
class Scrap:
    def __init__(self):

        # creating configurations to the driver
        options = Options()
        options.add_argument('start-maximized')
        options.add_argument('--incognito')

        self.l_articulos = []
        self.driver = WebDriver(executable_path=os.path.join(
            BASE_DIR, 'Driver', 'chromedriver'),
                                options=options)
        self.driver.get('https://www.mercadolibre.com.co/')

    def page(self):
        try:
            li = self.driver.find_element_by_class_name("pagination__next")
            next_link = li.find_element_by_tag_name('a').get_attribute('href')
            while next_link != "#":
                time.sleep(1)
                self.run()
                self.driver.find_element_by_class_name(
                    'pagination__next').click()

                li = self.driver.find_element_by_class_name("pagination__next")
                next_link = li.find_element_by_tag_name('a').get_attribute(
                    'href')
                if "#" in next_link:
                    break

        except:
            self.run()

    def run(self):
        info = self.driver.find_element_by_id('searchResults')
        ac = info.find_elements_by_tag_name('li')
        for a in ac:
            self.l_articulos.append(
                Articulo(
                    a.find_element_by_class_name('main-title').text,
                    a.find_element_by_class_name('price__fraction').text,
                    a.find_element_by_tag_name('a').get_attribute(
                        'href')).__dict__)

    def generar_articulos(self, string, min='0', max='0'):

        try:
            search = self.driver.find_element_by_name('as_word')
            search.send_keys(string)
            button = self.driver.find_element_by_class_name('nav-icon-search')
            button.click()
            # minimo = self.driver.find_element_by_id('fromPrice')
            # minimo.send_keys(min)
            # time.sleep(5)
            # maximo = self.driver.find_element_by_id('toPrice')
            # maximo.send_keys(max)
            button_r = self.driver.find_element_by_xpath(
                '//*[@id="priceForm"]/div/button')
            button_r.click()
            time.sleep(5)
            self.page()
            # return self.l_articulos
        finally:
            time.sleep(10)
            self.driver.close()
            self.driver.quit()
Beispiel #25
0
def run_selenium(driver: WebDriver, user, passwd, id):
    # tor_process = setupTor(id)
    # setup_proxy(driver, "127.0.0.1", int(id) * 2 + 9050 + 1)
    driver.get(address)
    print(f"{user}: start get /shell")
    try:
        while True:
            try:
                WebDriverWait(driver, 180).until(
                    EC.presence_of_element_located(
                        (By.CLASS_NAME, "login-form__realm-user-id-row")))
                user_form = driver.find_element_by_class_name(
                    "login-form__realm-user-id-row")
                user_form.find_element_by_id("userid").send_keys(user)
                user_form.find_element_by_tag_name("button").click()
                print(f"{user}: send username")
                try:
                    WebDriverWait(driver, 30).until(
                        EC.presence_of_element_located(
                            (By.CLASS_NAME, "error-header")))
                    print(
                        f'{user}: err={driver.find_element_by_class_name("error-header").get_attribute("innerHTML")}'
                    )
                    raise WebDriverException(f"{user} tor ip invalidate")
                except TimeoutException:
                    print(f"{user}: pass ip validate,start clear proxy")
                    # tor_process.terminate()
                    # driver.execute_script("window.open('')")
                    # default_handle = driver.current_window_handle
                    # handles = list(driver.window_handles)
                    # handles.remove(default_handle)
                    # driver.switch_to.window(handles[0])
                    # setup_proxy(driver, "", 0)
                    # driver.close()
                    # driver.switch_to.window(default_handle)
                break
            except (ElementClickInterceptedException,
                    ElementNotInteractableException):
                check_dialog(driver, user)
                continue
        while True:
            try:
                WebDriverWait(driver, 180).until(
                    EC.presence_of_element_located(
                        (By.CSS_SELECTOR,
                         "[class='login-form__password-row ']")))
                passwd_form = driver.find_element_by_class_name(
                    "login-form__password-row ")
                passwd_input = passwd_form.find_element_by_id("password")
                passwd_input.clear()
                passwd_input.send_keys(passwd)
                print(user + ": send passwd")
                driver.find_elements_by_css_selector(
                    '[class="login-form__button bx--btn bx--btn--primary"]'
                )[1].click()
                print(user + ": click login")
                break
            except (ElementClickInterceptedException,
                    ElementNotInteractableException) as e:
                print(f'{user}:do login failed,msg= {e.msg}')
                check_dialog(driver, user)
                continue
        s_time = time.time()
        while True:
            if address in driver.current_url:
                print(f"{user} enter shell success")
                break
            else:
                c_time = time.time()
                if c_time - s_time > 180:
                    print(
                        f"{user}: enter shell too long,re enter,curentUrl={driver.current_url}"
                    )
                    raise WebDriverException(
                        f"{user}: userid or passwd error,relogin")
                sleep(2)
    except WebDriverException as e:
        print(f"{user}: login failed msg={e.msg}")
        driver.delete_all_cookies()
        # tor_process.terminate()
        run_selenium(driver, user, passwd, id)

    # change to tokyo
    while True:
        try:
            WebDriverWait(driver, 120).until(
                EC.presence_of_element_located(
                    (By.CLASS_NAME, "bx--header__global")))
            header = driver.find_element_by_class_name("bx--header__global")
            if header.find_element_by_class_name(
                    "header__location-name").text != "Tokyo":
                print(f"{user}: region is not tokyo,begin to switch")
                header.find_element_by_css_selector(
                    "[class='header__location-change-button bx--btn bx--btn--ghost']"
                ).click()
                selects = safelyFindId(driver, "selectRegion")
                selects.click()
                selects.find_element_by_css_selector(
                    "[value='jp-tok']").click()
                safelyFindCSS(driver,
                              "[class='bx--btn bx--btn--primary']").click()
            break
        except (ElementClickInterceptedException,
                ElementNotInteractableException):
            check_dialog(driver, user)
            continue
        except WebDriverException:
            print(user + ": enter shell too long,refresh page")
            driver.refresh()
    return switch_to_frame_execute(driver,
                                   str(user).split("@")[0].replace('.', "_"),
                                   id)
Beispiel #26
0
class Pimper:
    def __init__(self,
                 src,
                 dest=None,
                 unknown=None,
                 chromedriver_location=None,
                 proxy_server=None,
                 fast_proxy=False):

        if chromedriver_location is None:
            self.chromedriver_location = os.path.abspath(
                os.path.dirname(
                    sys.argv[0])) + "\chromedriver_win32\chromedriver.exe"
        else:
            self.chromedriver_location = chromedriver_location
        if debug:
            print("Chrome location:", self.chromedriver_location)

        if debug:
            print("src:", src)
            print("dest:", dest)

        self.f = open(r'C:\Python34\Projects\pimp-my-collection\text.txt', 'a')
        self.f.write('\n' + str(datetime.today()) + '\n')
        self.titles = []

        #Куда кидать отсортированные
        if dest is None:
            try:
                self.dest = os.path.abspath(os.path.dirname(sys.argv[0]))
                os.chdir(self.dest)
                os.mkdir("sorted_images")
            except OSError:
                if debug:
                    print("dest folder already exists")
                pass
            finally:
                self.dest = (self.dest + "\sorted_images")
                os.chdir(self.dest)
        else:
            self.dest = dest
            try:
                os.chdir(self.dest)
            except FileNotFoundError:
                print("No such directory:", self.dest)
                exit(1)

        #Папка для картинок без сурса
        if unknown is None:
            try:
                os.mkdir("unknown")
            except OSError:
                if debug:
                    print("unkn folder already exists")
                pass
            finally:
                self.unknown = self.dest + r"\unknown"
        else:
            self.unknown = unknown
            try:
                os.mkdir(self.unknown)
            except OSError:
                if debug:
                    print("unknown folder already exists")
                pass

        if debug:
            print("dest:", self.dest)
            print("unknown:", self.unknown)

        #Откуда берем картинки
        self.folder = src
        try:
            self.images = os.listdir(path=self.folder)
        except FileNotFoundError:
            print("No such directory:", self.folder)
            exit(1)

        if debug:
            for i in self.images:
                try:
                    print(i)
                except UnicodeEncodeError:
                    i = i.encode('ascii', 'ignore')
                    print("bad unicode:", i)

        self.sleep_time = 3
        self.proxy_sleep_time = 3
        self.waiting_time = 15
        self.fast_proxy = fast_proxy
        #Новая версия - новая прокси
        from selenium.webdriver import Proxy
        if proxy_server is None:
            proxy_server = "163.172.175.210:3128"  #https://free-proxy-list.net/
            settings = {"httpProxy": proxy_server, "sslProxy": proxy_server}
            self.proxy_server = Proxy(settings)
        else:
            settings = {"httpProxy": proxy_server, "sslProxy": proxy_server}
            self.proxy_server = Proxy(settings)

        from selenium.webdriver.chrome.webdriver import WebDriver as ChromeDriver
        from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
        cap = DesiredCapabilities.CHROME.copy()
        cap['platform'] = "WINDOWS"
        cap['version'] = "10"
        #Без прокси
        self.driver = ChromeDriver(desired_capabilities=cap,
                                   executable_path=self.chromedriver_location)

        #С прокси
        self.proxy_server.add_to_capabilities(cap)
        self.driver2 = ChromeDriver(desired_capabilities=cap,
                                    executable_path=self.chromedriver_location)

    def find_on_yandere(self):
        try:
            source = self.driver.find_element_by_class_name(
                'tag-type-copyright')
        except NoSuchElementException:
            if debug:
                print("no source")
            return None

        if debug:
            print(source)
            print(source.text)

        source2 = source.find_elements_by_css_selector('a')

        if debug:
            print(source2)
            for i in source2:
                print(i.text)
                print(i.get_attribute('href'))

            print(source2[1].text)

        return source2[1].text

    def find_on_sankaku(self, addr):
        try:
            source = self.driver2.find_element_by_class_name(
                'tag-type-copyright')
            if not self.fast_proxy:
                sleep(self.proxy_sleep_time)
        except TimeoutException:
            if debug:
                print("time out")
            self.driver2.get(addr)
            sleep(self.proxy_sleep_time)
            source = self.driver2.find_element_by_class_name(
                'tag-type-copyright')
            if not self.fast_proxy:
                sleep(self.proxy_sleep_time)
        except NoSuchElementException:
            if not self.fast_proxy:
                if debug:
                    print("no element")
                self.driver2.get(addr)
                sleep(self.proxy_sleep_time)

                try:
                    source = self.driver2.find_element_by_class_name(
                        'tag-type-copyright')
                    sleep(self.proxy_sleep_time)
                except NoSuchElementException:
                    if debug:
                        print("actually no element")
                    return None
            else:
                return None

        if debug:
            print(source)
            print(source.text)

        if not self.fast_proxy:
            sleep(self.proxy_sleep_time)

        try:
            source2 = source.find_elements_by_css_selector('a')
            if not self.fast_proxy:
                sleep(self.proxy_sleep_time)
        except TimeoutException:
            sleep(self.proxy_sleep_time)

        if debug:
            print(source2)
            for i in source2:
                print(i.text)
                print(i.get_attribute('href'))

            print(source2[0].text)

        return source2[0].text

    def find_on_eshuushuu(self):
        got_source = False
        source = self.driver.find_elements_by_class_name('quicktag')
        check = self.driver.find_elements_by_tag_name('dt')

        if debug:
            for i in source:
                it = i.text
                try:
                    print(it)
                    print(i.get_attribute('span'))
                except UnicodeEncodeError:
                    it = it.encode('ascii', 'ignore')
                    print("bad unicode:", it)

            print(check)
            print("possible source:",
                  source[1].text[1:len(source[1].text) - 1])

        for i in check:
            if debug:
                print(i.text)
            if i.text.find("Source") != -1:
                return source[1].text[1:len(source[1].text) - 1]

        return None

    def find_on_danbooru(self, addr):
        try:
            source = self.driver2.find_element_by_class_name('category-3')
            if not self.fast_proxy:
                sleep(self.proxy_sleep_time)
        except TimeoutException:
            if debug:
                print("time out")
            self.driver2.get(addr)
            sleep(self.proxy_sleep_time)
            source = self.driver2.find_element_by_class_name('category-3')
            if not self.fast_proxy:
                sleep(self.proxy_sleep_time)
        except NoSuchElementException:
            if not self.fast_proxy:
                if debug:
                    print("no element")
                self.driver2.get(addr)
                sleep(self.proxy_sleep_time)

                try:
                    source = self.driver2.find_element_by_class_name(
                        'category-3')
                    sleep(self.proxy_sleep_time)
                except NoSuchElementException:
                    if debug:
                        print("actually no element")
                    return None
            else:
                return None

        if debug:
            print(source)

        try:
            source2 = source.find_elements_by_css_selector('a')
            if not self.fast_proxy:
                sleep(self.proxy_sleep_time)
        except TimeoutException:
            if debug:
                print("time out source 2")
            sleep(self.proxy_sleep_time)

        if debug:
            print(source2)
            for i in source2:
                print(i.text)
                print(i.get_attribute('href'))

            print("source:", source2[1].text)

        return source2[1].text

    def find_on_gelbooru(self, addr):
        try:
            source = self.driver2.find_element_by_class_name(
                'tag-type-copyright')
            if not self.fast_proxy:
                sleep(self.proxy_sleep_time)
        except TimeoutException:
            if debug:
                print("time out")
            self.driver2.get(addr)
            sleep(self.proxy_sleep_time)
            source = self.driver2.find_element_by_class_name(
                'tag-type-copyright')
            if not self.fast_proxy:
                sleep(self.proxy_sleep_time)
        except NoSuchElementException:
            if not self.fast_proxy:
                if debug:
                    print("no element")
                self.driver2.get(addr)
                sleep(self.proxy_sleep_time)

                try:
                    source = self.driver2.find_element_by_class_name(
                        'tag-type-copyright')
                    sleep(self.proxy_sleep_time)
                except NoSuchElementException:
                    if debug:
                        print("actually no element")
                    return None
            else:
                return None

        if debug:
            print(source)

        try:
            source2 = source.find_elements_by_css_selector('a')
            if not self.fast_proxy:
                sleep(self.proxy_sleep_time)
        except TimeoutException:
            if debug:
                print("time out source 2")
            sleep(self.proxy_sleep_time)

        if debug:
            print(source2)
            for i in source2:
                print(i.text)
                print(i.get_attribute('href'))

            print("source:", source2[1].text)

        return source2[1].text

    def move_image(self, folder_name):
        img = (self.img_name[1:len(self.img_name)]).encode('ascii', 'ignore')
        #Сурс не нашелся
        if folder_name is None:
            dest = (self.unknown).encode('ascii', 'ignore')
            try:
                if debug:
                    print("src:", self.folder + self.img_name)
                    print("dst:", dest)
                shutil.copy(self.folder + self.img_name, self.unknown)
                os.remove(self.folder + self.img_name)
                print("image", img, "successfully moved in", dest)
            except:
                print("Error while moving image", img)
        #Сурс найден
        else:
            dest = (self.dest + r'\n'[:-1] + folder_name).encode(
                'ascii', 'ignore')
            #Убираем запрещенные символы для имени папки
            forbidden_symbols = re.findall('[*|\:"<>?/]', folder_name)
            for symb in forbidden_symbols:
                if debug:
                    print(symb)
                folder_name = folder_name.replace(symb, "").lower()
            if debug:
                print("new folder name:", folder_name)
            print("writing...")
            if (folder_name not in self.titles):
                try:
                    self.f.write(folder_name + '\n')
                    self.titles.append(folder_name)
                except UnicodeEncodeError:
                    pass
            try:
                os.mkdir(folder_name)
            except OSError:
                if debug:
                    print("folder", folder_name, "already exists")
                pass
            try:
                shutil.copy(self.folder + self.img_name, folder_name)
                os.remove(self.folder + self.img_name)
                print("image", img, "successfully moved in", dest)
            except OSError:
                print("Error while moving image", img)
            sleep(self.sleep_time)

    #Приоритет сайтов
    def sort_addresses(self, pic_addr):
        variants = self.driver.find_element_by_id(
            'pages').find_elements_by_tag_name('td')
        if debug:
            print("find %")
            for i in variants:
                try:
                    print(i.text)
                except UnicodeEncodeError:
                    new_i = i.text.encode('ascii', 'ignore')
                    print("bad unicode:", new_i)
            for addr in pic_addr:
                addr2 = addr.get_attribute('href')
                print("trying", addr2)
            print("1st variant:", variants[6].text, "len =", len(variants),
                  "len var = ", len(variants[6].text))

        #Второе найденное similarity
        if len(variants[6].text) == 0:
            pos = 9
        else:
            pos = 10
        priority = 6
        best_addr = pic_addr[0].get_attribute('href')
        if (best_addr.find("danbooru")) != -1:
            if debug:
                print("danbooru[0]")
            priority = 3
        elif (best_addr.find("sankaku")) != -1:
            if debug:
                print("sankaku[0]")
            priority = 4
        elif (best_addr.find("gelbooru")) != -1:
            if debug:
                print("gelbooru[0]")
            priority = 5
        elif (best_addr.find("shuushuu")) != -1:
            if debug:
                print("shuushuu[0]")
            priority = 2
        elif (best_addr.find("yande")) != -1:
            if debug:
                print("yandere[0]")
            priority = 1

        if priority > 1:
            for addr in pic_addr[1:len(pic_addr)]:
                addr2 = addr.get_attribute('href')

                if pos > len(variants):
                    break
                similarity = int(re.search('\d+', variants[pos].text).group())
                if debug:
                    print("similarity =", similarity)
                #if similarity >= 70:
                if (addr2.find("danbooru")) != -1:
                    if debug:
                        print("danbooru", priority)
                    if priority > 3:
                        best_addr = addr2
                        priority = 3
                elif (addr2.find("sankaku")) != -1:
                    if debug:
                        print("sankaku", priority)
                    if priority > 4:
                        best_addr = addr2
                        priority = 4
                elif (addr2.find("gelbooru")) != -1:
                    if debug:
                        print("gelbooru", priority)
                    if priority > 5:
                        best_addr = addr2
                        priority = 5
                elif (addr2.find("shuushuu")) != -1:
                    if debug:
                        print("shuushuu", priority)
                    if priority > 2:
                        best_addr = addr2
                        priority = 2
                        break
                elif (addr2.find("yande")) != -1:
                    if debug:
                        print("yandere", priority)
                    if priority > 1:
                        best_addr = addr2
                        priority = 1
                        break

                pos += 4  #Следующее similarity

        if debug:
            print("best_addr:", best_addr)
        return best_addr, priority

    def search_for_source(self, pic_addr):
        best_addr, priority = self.sort_addresses(pic_addr)
        folder_name = None

        if debug:
            print("trying", best_addr)

        if priority == 1:
            print("searching on yandere")
            try:
                self.driver.get(best_addr)
            except WebDriverException as inst:
                if debug:
                    print(inst)
                exit(1)
            folder_name = self.find_on_yandere()
        elif priority == 4:
            print("searching on sankaku")
            try:
                self.driver2.get(best_addr)
                sleep(self.proxy_sleep_time)
            except TimeoutException:
                if debug:
                    print("time out in if")
                sleep(self.proxy_sleep_time)
            except WebDriverException as inst:
                if debug:
                    print(inst)
                exit(1)
            folder_name = self.find_on_sankaku(best_addr)
        elif priority == 2:
            print("searching on e-shuushuu")
            try:
                self.driver.get(best_addr)
            except WebDriverException as inst:
                if debug:
                    print(inst)
                exit(1)
            folder_name = self.find_on_eshuushuu()
        elif priority == 3:
            print("searching on danbooru")
            try:
                self.driver2.get(best_addr)
                sleep(self.proxy_sleep_time)
            except TimeoutException:
                if debug:
                    print("time out in if")
                sleep(self.proxy_sleep_time)
            except WebDriverException as inst:
                if debug:
                    print(inst)
                exit(1)
            folder_name = self.find_on_danbooru(best_addr)
        elif priority == 5:
            print("searching on gelbooru")
            try:
                self.driver2.get(best_addr)
                sleep(self.proxy_sleep_time)
            except TimeoutException:
                if debug:
                    print("time out in if")
                sleep(self.proxy_sleep_time)
            except WebDriverException as inst:
                if debug:
                    print(inst)
                exit(1)
            folder_name = self.find_on_gelbooru(best_addr)

        if folder_name is None:
            print("No relevant match for", self.img_name[1:len(self.img_name)])
        self.move_image(folder_name)

    def iqdb_actions(self):
        for image in self.images:

            print("\nprocessing",
                  self.images.index(image) + 1, "of", len(self.images))

            self.img_name = r'\n'[:-1] + image

            if debug:
                try:
                    print(self.folder + self.img_name)
                except UnicodeEncodeError:
                    print("bad unicode")
            sleep(self.sleep_time)

            if ((image[len(image) - 4:] != ".jpg")
                    and (image[len(image) - 4:] != ".png")
                    and (image[len(image) - 5:] != ".jpeg")):
                try:
                    print("Unsupported format:", image)
                except UnicodeEncodeError:
                    image = image.encode('ascii', 'ignore')
                    print(image)
            else:
                self.driver.get("http://iqdb.org/")
                #Вставляем изображение
                element = ui.WebDriverWait(
                    self.driver, self.waiting_time).until(
                        lambda driver: self.driver.find_element_by_id("file"))

                if debug:
                    print(element)

                element.send_keys(self.folder + self.img_name)

                #Сабмитим
                element = ui.WebDriverWait(
                    self.driver, self.waiting_time).until(
                        lambda driver: self.driver.find_element_by_xpath(
                            "//input[@value='submit']"))

                if debug:
                    print(element)

                try:
                    element.click()
                except TimeoutException:
                    sleep(self.sleep_time)

                sleep(self.sleep_time)

                #Ищем лучшее совпадение
                try:
                    pic_addr = ui.WebDriverWait(
                        self.driver, self.waiting_time).until(
                            lambda driver: self.driver.
                            find_elements_by_css_selector('.image a'))
                except TimeoutException:
                    print("Image", image, "is to o large")
                    self.move_image(None)
                else:
                    if debug:
                        print(pic_addr)

                    matches = ui.WebDriverWait(
                        self.driver, self.waiting_time
                    ).until(lambda driver: self.driver.find_element_by_xpath(
                        '//*[@id="pages"]/div[2]/table/tbody/tr[1]/th'))

                    if debug:
                        print("matches:", matches)
                        print(matches.text)

                    if (matches.text.find("No")) != -1:
                        print(matches.text, "for", image)
                        self.move_image(None)
                    else:
                        self.search_for_source(pic_addr)

                    sleep(self.sleep_time)

    def pimp(self):
        if debug:
            print("proxy mode:", self.fast_proxy)
        try:
            self.iqdb_actions()
        except KeyboardInterrupt:
            print("Stop working...")
        finally:
            self.driver.quit()
            self.driver2.quit()
            self.f.close()
            print("Job's done")
Beispiel #27
0
import math
from selenium.webdriver.chrome.webdriver import WebDriver

string = str(math.ceil(math.pow(math.pi, math.e) * 10000))

con = WebDriver()
con.get("http://suninjuly.github.io/find_link_text")
con.find_element_by_partial_link_text(string).click()

input1 = con.find_element_by_tag_name("input")
input1.send_keys("Ivan")
input2 = con.find_element_by_name("last_name")
input2.send_keys("Petrov")
input3 = con.find_element_by_class_name("city")
input3.send_keys("Smolensk")
input4 = con.find_element_by_id("country")
input4.send_keys("Russia")
button = con.find_element_by_css_selector("button.btn")
button.click()