Example #1
0
def write_list_to_file(result_phrases_list):
    for phrase in result_phrases_list:
        tmp_str = " ".join(phrase)
        write_phrase_to_log(phrase=tmp_str,
                            write_mode="a",
                            enc=FILE_ENCODING,
                            full_path_to_file=RESULT_FILE_PATH)
Example #2
0
def parse_phrase_bunch(phrases):
    global phrase_counter

    logs_dir = os.path.join(get_current_dir(), "log")
    # clear_files(logs_dir)

    try:
        driver = get_driver()

        used_email = handle_login(driver)

        driver.get("https://tools.pixelplus.ru/tools/geo")

        email_log = get_log_path("email_log.txt")
        # phrase, write_mode, enc, full_path_to_file
        write_phrase_to_log(used_email, 'a', ENC, email_log)

        handle_phrases(phrases, driver)

        index_log = get_log_path("index_log.txt")
        write_phrase_to_log(phrase_counter, 'w', ENC, index_log)
        phrase_counter += PHRASE_BUNCH_SIZE

    except Exception as e:
        print("Проблема ^^^")
        print(e)
        driver.quit()
        parse_phrase_bunch(phrases)
Example #3
0
def write_upstairs(a_string):
    # Записать в результирующий файл фразы лесенкой.

    all_variants = get_variants_for_string(a_string)
    phrases, minus_words = separate_phrases_and_minus_words(all_variants)

    minus_words_str = " ".join(minus_words)

    for phrase in phrases:
        for i in range(1, 8):  #  Wordstat принимает не более 7 слов.
            tmp_list = [phrase for _ in range(i)]

            if len(tmp_list) == 1:  # Однословники не нужны.
                continue

            break_loop, tmp_list = cut_words_to_limit(tmp_list)
            phrase_for_writing = " ".join(tmp_list)
            tmp_result = '{symb}{phr}{symb} {minus}'.format(
                symb='"', phr=phrase_for_writing, minus=minus_words_str)

            write_phrase_to_log(phrase=tmp_result,
                                write_mode='a',
                                enc=FILE_ENCODING,
                                full_path_to_file=RESULT_FILE_PATH)

            if break_loop:
                break
Example #4
0
def write_table_open_tag(site, region):
    global RESULT_FILE

    RESULT_FILE = os.path.join(get_current_dir(), PARSING_PATH_PARTICLE,
                               'Result/{domain}_{region}_result.html'.format(domain=site, region=region))
    write_phrase_to_log("<html>\n<table>\n",
                        write_mode='w',
                        enc=WRITE_ENCODING,
                        full_path_to_file=RESULT_FILE)
Example #5
0
def write_log_header():
    html = """
    <html>
            <head>
                <meta charset="utf-8">
            </head>
                <body>
            <table>			
    """
    write_phrase_to_log(html, "a", WRITE_ENCODING, LOG_FILE)
Example #6
0
def parse_url(driver, url):

    try:  # Транзакция
        driver.get(MEGAINDEX_KEYWORDS_URL)
        init_number_of_files = count_files()
        url_input = driver.find_element_by_xpath('//input[@name="url"]')
        url_input.clear()
        url_input.send_keys(url)

        search_button = driver.find_element_by_tag_name(
            'button')  # Это кнопка поиска.
        search_button.click()

        nothing_found = None  # Элемент, соответствуюий тегу с текстом "Ничего не найдено".

        try:
            nothing_found = driver.find_element_by_xpath(
                '//td[contains(text(), "Ничего не найдено")]')
        except NoSuchElementException:
            pass  # ничего не делаем.

        if nothing_found:
            write_phrase_to_log(
                "{}DELIMITER {} ничего не найдено.".format(url, PROJECT), 'a',
                'utf-8', LOG_FILE)
            return

        export_to_csv_button = driver.find_element_by_xpath(
            '//input[@type="button"]')  # Кнопка экспорта в CSV.

        export_to_csv_button.click()

        confirm_button = driver.find_element_by_xpath(
            '//span[text()="Скачать файл"]')
        confirm_button.click()

        while True:
            current_number_of_files = count_files()
            if current_number_of_files > init_number_of_files:
                rename_downloaded_file()
                break
            sleep(1)

        write_phrase_to_log("{}DELIMITER {} успешно.".format(url, PROJECT),
                            'a', 'utf-8', LOG_FILE)

    except Exception as e:
        print(e)
        parse_url(driver, url)
Example #7
0
def create_pixel_plus_account():

    try:
        driver = get_driver(USE_PROXY)
        mail_account = get_mail_account()

        if not mail_account:
            quit()

        driver.get("https://tools.pixelplus.ru/#internal-optimization")
        login_button = driver.find_element_by_link_text('Войти')
        login_button.click()

        register_button = driver.find_element_by_link_text(
            'Зарегистрироваться.')
        register_button.click()

        nick_field = driver.find_element_by_id("input-name")
        nick_field.send_keys(mail_account)

        nick_field = driver.find_element_by_id("input-email")
        nick_field.send_keys(mail_account)

        nick_field = driver.find_element_by_id("input-password")
        nick_field.send_keys("goskomstat")
        sleep(2)
        # nick_field.send_keys(Keys.ENTER)

        # button_element = driver.find_element_by_link_text('Зарегистрироваться')
        # button_element.click()

        pass
        # sleep(20)

    except:
        create_pixel_plus_account()

    write_phrase_to_log(
        mail_account,
        "/home/michael/PycharmProjects/PixelPlus/log/used_emails.txt")
Example #8
0
def handle_link_list(link_list):
    for a_line in link_list:
        current_link = a_line[LINK_COL]

        try:
            title, keywords, descriptions, h1_s, h2_s, h3_s, h4_s, alts \
                = get_data_from_competitor(current_link)
        except Exception as e:  # Не смогли спарсить у этого конкурента.
            print(e)
            continue

        for element in [
                title, keywords, descriptions, h1_s, h2_s, h3_s, h4_s, alts
        ]:
            a_line.append(element)

        csv_line = convert_list_into_csv_line(a_line)
        write_phrase_to_log(phrase=csv_line,
                            write_mode='a',
                            enc=WRITE_FILE_ENCODING,
                            full_path_to_file=RESULT_FILE)
        pass
Example #9
0
def handle_chunks(drv, phrases):
    chunks = list(get_chunks_generator(phrases))

    chunk_counter = 0  # Нужен только для отладки.
    while chunks:
        chunk = chunks.pop(0)
        textarea = fill_phrases(drv, chunk)

        successful = False
        write_phrase_to_log('<tr><th>{}</th></tr>'.format(chunk_counter), "a",
                            WRITE_ENCODING, LOG_FILE)
        while not successful:
            submit_button_click(drv)
            try:
                table_html = get_results(drv)
            except TimeoutException as e:
                print(e)
                continue  # Repeat Submit button click. We skip this iteration, and "successful = False".
            except StaleElementReferenceException as e:
                print(e)
                continue  # Repeat Submit button click. We skip this iteration, and "successful = False".
            except UnexpectedAlertPresentException as e:
                # Кончились лимиты. Запишем недопарсенное в файл.
                chunks.append(chunk)
                tmp_chunks = list(chain(*chunks))
                chunks_as_str = "\n".join(tmp_chunks)
                remainder = os.path.join(LOG_DIR, "future.txt")
                write_phrase_to_log(chunks_as_str, "w", WRITE_ENCODING,
                                    remainder)
                drv.quit()
                quit()
            except NoSuchElementException as e:
                print(e)
                continue

            write_phrase_to_log(table_html, "a", WRITE_ENCODING, LOG_FILE)
            successful = True

        chunk_counter += 1

        try:
            textarea.clear()
        except NoSuchElementException as e:  # Один раз такое исключение встретилось. Если еще раз встретится, попробовать отдебажить.
            textarea = drv.find_element_by_tag_name("textarea")
            textarea.clear()
            print(e)

    print("Counter {}".format(chunk_counter))
Example #10
0
def write_table_closing_tag():
    write_phrase_to_log("</table>\n</html>",
                        write_mode='a',
                        enc=WRITE_ENCODING,
                        full_path_to_file=RESULT_FILE)
Example #11
0
def handle_phrase(phrase):

    while True:
        try:  # Транзакция.
            driver = get_driver()
            driver.get(
                "https://yandex.ru/tune/geo/?retpath=https%3A%2F%2Fwww.yandex.ru%2F%3Fdomredir%3D1%26text%3D%25D0%25BA%25D1%2583%25D0%25BF%25D0%25B8%25D1%2582%25D1%258C%2520%25D0%25BA%25D0%25BE%25D0%25BC%25D0%25BF%25D1%258C%25D1%258E%25D1%2582%25D0%25B5%25D1%2580%26lr%3D213%26domredir%3D1&nosync=1"
            )
            change_city(driver)

            for i in range(PAGES_TO_PARSE):
                if i == 0:
                    send_phrase_to_search(driver, phrase)

                sleep(3)
                parsed_links_tmp = collect_links(driver)

                print("parsed_links")
                # link_log_file = "{}.csv".format(SELECTED_REGION)
                parsed_links = prepare_csv(phrase, parsed_links_tmp)

                write_list_to_file(parsed_links, WRITE_ENCODING, RESULT_FILE)

                print("highlited_words")
                highlited_words_log_file = "{}_highlighted.csv".format(
                    SELECTED_REGION)
                highlited_words_tmp = get_highlighted_words(driver)
                highlited_words = prepare_csv(phrase, highlited_words_tmp)
                full_path_to_highlited_words_file = os.path.join(
                    LOGS_DIR, highlited_words_log_file)

                write_list_to_file(highlited_words, WRITE_ENCODING,
                                   full_path_to_highlited_words_file)

                if PARSE_RELATED_WORDS:
                    print("tmp_related_item")
                    tmp_related_item_list = collect_related_items(driver)
                    related_item_list = prepare_csv(phrase,
                                                    tmp_related_item_list)

                    print("related_items")
                    related_items_log_file = "{}_related_items.csv".format(
                        SELECTED_REGION)
                    full_path_to_log_file = os.path.join(
                        LOGS_DIR, related_items_log_file)
                    write_list_to_file(related_item_list, WRITE_ENCODING,
                                       full_path_to_log_file)

                print("go_to_next_page")
                go_to_next_page(driver)

            log_file = os.path.join(
                LOGS_DIR, "{}_last_phrase.txt".format(SELECTED_REGION))
            write_phrase_to_log(phrase=phrase,
                                write_mode='a',
                                enc=WRITE_ENCODING,
                                full_path_to_file=log_file)
            driver.quit()
            break
        except Exception as e:
            print(e)
            driver.quit()
            handle_phrase(phrase)
Example #12
0
def write_log_footer():
    write_phrase_to_log("</table></body></html>", "a", WRITE_ENCODING,
                        LOG_FILE)
Example #13
0
def create_csv_titles():
    csv_line = "Query;Url;title;keywords;descriptions;h1_s;h2_s;h3_s;h4_s;alts"
    write_phrase_to_log(phrase=csv_line,
                        write_mode='a',
                        enc=WRITE_FILE_ENCODING,
                        full_path_to_file=RESULT_FILE)
def open_yandex_to_register_acc():
    global chrome

    current_phone = None
    while all_phones:
        chrome = get_chrome()
        url = "https://passport.yandex.ru/registration"
        chrome.get(url)
        try:
            first_name_element = chrome.find_element_by_id("firstname")
        except NoSuchElementException:
            send_proxy_to_black_set()

        first_name_element.send_keys(get_random_string(string_length=10))

        last_name_element = chrome.find_element_by_id("lastname")
        last_name_element.send_keys(get_random_string(string_length=10))

        login_element = chrome.find_element_by_id("login")
        login = generate_unique_login()
        login_element.send_keys(login)

        password_element = chrome.find_element_by_id("password")
        password_element.send_keys(PASSWORD)

        password_confirm_element = chrome.find_element_by_id(
            "password_confirm")
        password_confirm_element.send_keys(PASSWORD)

        phone_element = chrome.find_element_by_id("phone")

        phone_number = current_phone or all_phones.pop()

        if not current_phone:
            current_phone = phone_number

        phone_element.send_keys(phone_number)

        phone_number_without_plus = phone_number[1:]

        # firefox = get_firefox_with_profile()

        # buttons = chrome.find_elements_by_tag_name('button')
        # button_get_code = buttons[1] # Кнопка "Получить код"
        button_get_code = get_code_button()

        try:
            button_get_code.click()
        except WebDriverException as e:
            # Выскочило selenium.common.exceptions.WebDriverException: Message: unknown error: Element <button class="button2 button2_size_m button2_theme_normal button2_width_max" type="button" autocomplete="off" aria-pressed="false">...</button> is not clickable at point (753, 528). Other element would receive the click: <input type="tel" class="textinput__control" id="phone" name="phone" value="+12495016287">
            # Попробуем еще раз.
            print(e)
            button_get_code = get_code_button()
            button_get_code.click()

        limit_for_phone_reached = check_limit_reached(
            phone_number_without_plus)

        if limit_for_phone_reached:
            current_phone = None  # Больше не использовать текущий телефонный номер.
            phone_number = None
            continue

        confirmation_code = get_confirmation_code(phone_number_without_plus)

        if not confirmation_code:
            pass  # Вероятно, не пройдена капча. Надо остановить программу, пройти капчу.
            # Т.е. здесь обязательно должна быть точка останова.
            confirmation_code = get_confirmation_code(
                phone_number_without_plus)

        success = try_code(confirmation_code)

        if not success:
            limit_for_phone_reached = after_code_input_check_limit_reached(
                phone_number_without_plus)
            current_phone = None  # Больше не использовать текущий телефонный номер.
            phone_number = None
            continue

        print("Success: {}".format(login))
        write_phrase_to_log("{};{}".format(login, current_phone), "a",
                            WRITE_ENCODING, LOG_FILE)
        chrome.quit()