def write_list_to_file(result_phrases_list): for phrase in result_phrases_list: tmp_str = " ".join(phrase) write_phrase_to_log(phrase=tmp_str, write_mode="a", enc=FILE_ENCODING, full_path_to_file=RESULT_FILE_PATH)
def parse_phrase_bunch(phrases): global phrase_counter logs_dir = os.path.join(get_current_dir(), "log") # clear_files(logs_dir) try: driver = get_driver() used_email = handle_login(driver) driver.get("https://tools.pixelplus.ru/tools/geo") email_log = get_log_path("email_log.txt") # phrase, write_mode, enc, full_path_to_file write_phrase_to_log(used_email, 'a', ENC, email_log) handle_phrases(phrases, driver) index_log = get_log_path("index_log.txt") write_phrase_to_log(phrase_counter, 'w', ENC, index_log) phrase_counter += PHRASE_BUNCH_SIZE except Exception as e: print("Проблема ^^^") print(e) driver.quit() parse_phrase_bunch(phrases)
def write_upstairs(a_string): # Записать в результирующий файл фразы лесенкой. all_variants = get_variants_for_string(a_string) phrases, minus_words = separate_phrases_and_minus_words(all_variants) minus_words_str = " ".join(minus_words) for phrase in phrases: for i in range(1, 8): # Wordstat принимает не более 7 слов. tmp_list = [phrase for _ in range(i)] if len(tmp_list) == 1: # Однословники не нужны. continue break_loop, tmp_list = cut_words_to_limit(tmp_list) phrase_for_writing = " ".join(tmp_list) tmp_result = '{symb}{phr}{symb} {minus}'.format( symb='"', phr=phrase_for_writing, minus=minus_words_str) write_phrase_to_log(phrase=tmp_result, write_mode='a', enc=FILE_ENCODING, full_path_to_file=RESULT_FILE_PATH) if break_loop: break
def write_table_open_tag(site, region): global RESULT_FILE RESULT_FILE = os.path.join(get_current_dir(), PARSING_PATH_PARTICLE, 'Result/{domain}_{region}_result.html'.format(domain=site, region=region)) write_phrase_to_log("<html>\n<table>\n", write_mode='w', enc=WRITE_ENCODING, full_path_to_file=RESULT_FILE)
def write_log_header(): html = """ <html> <head> <meta charset="utf-8"> </head> <body> <table> """ write_phrase_to_log(html, "a", WRITE_ENCODING, LOG_FILE)
def parse_url(driver, url): try: # Транзакция driver.get(MEGAINDEX_KEYWORDS_URL) init_number_of_files = count_files() url_input = driver.find_element_by_xpath('//input[@name="url"]') url_input.clear() url_input.send_keys(url) search_button = driver.find_element_by_tag_name( 'button') # Это кнопка поиска. search_button.click() nothing_found = None # Элемент, соответствуюий тегу с текстом "Ничего не найдено". try: nothing_found = driver.find_element_by_xpath( '//td[contains(text(), "Ничего не найдено")]') except NoSuchElementException: pass # ничего не делаем. if nothing_found: write_phrase_to_log( "{}DELIMITER {} ничего не найдено.".format(url, PROJECT), 'a', 'utf-8', LOG_FILE) return export_to_csv_button = driver.find_element_by_xpath( '//input[@type="button"]') # Кнопка экспорта в CSV. export_to_csv_button.click() confirm_button = driver.find_element_by_xpath( '//span[text()="Скачать файл"]') confirm_button.click() while True: current_number_of_files = count_files() if current_number_of_files > init_number_of_files: rename_downloaded_file() break sleep(1) write_phrase_to_log("{}DELIMITER {} успешно.".format(url, PROJECT), 'a', 'utf-8', LOG_FILE) except Exception as e: print(e) parse_url(driver, url)
def create_pixel_plus_account(): try: driver = get_driver(USE_PROXY) mail_account = get_mail_account() if not mail_account: quit() driver.get("https://tools.pixelplus.ru/#internal-optimization") login_button = driver.find_element_by_link_text('Войти') login_button.click() register_button = driver.find_element_by_link_text( 'Зарегистрироваться.') register_button.click() nick_field = driver.find_element_by_id("input-name") nick_field.send_keys(mail_account) nick_field = driver.find_element_by_id("input-email") nick_field.send_keys(mail_account) nick_field = driver.find_element_by_id("input-password") nick_field.send_keys("goskomstat") sleep(2) # nick_field.send_keys(Keys.ENTER) # button_element = driver.find_element_by_link_text('Зарегистрироваться') # button_element.click() pass # sleep(20) except: create_pixel_plus_account() write_phrase_to_log( mail_account, "/home/michael/PycharmProjects/PixelPlus/log/used_emails.txt")
def handle_link_list(link_list): for a_line in link_list: current_link = a_line[LINK_COL] try: title, keywords, descriptions, h1_s, h2_s, h3_s, h4_s, alts \ = get_data_from_competitor(current_link) except Exception as e: # Не смогли спарсить у этого конкурента. print(e) continue for element in [ title, keywords, descriptions, h1_s, h2_s, h3_s, h4_s, alts ]: a_line.append(element) csv_line = convert_list_into_csv_line(a_line) write_phrase_to_log(phrase=csv_line, write_mode='a', enc=WRITE_FILE_ENCODING, full_path_to_file=RESULT_FILE) pass
def handle_chunks(drv, phrases): chunks = list(get_chunks_generator(phrases)) chunk_counter = 0 # Нужен только для отладки. while chunks: chunk = chunks.pop(0) textarea = fill_phrases(drv, chunk) successful = False write_phrase_to_log('<tr><th>{}</th></tr>'.format(chunk_counter), "a", WRITE_ENCODING, LOG_FILE) while not successful: submit_button_click(drv) try: table_html = get_results(drv) except TimeoutException as e: print(e) continue # Repeat Submit button click. We skip this iteration, and "successful = False". except StaleElementReferenceException as e: print(e) continue # Repeat Submit button click. We skip this iteration, and "successful = False". except UnexpectedAlertPresentException as e: # Кончились лимиты. Запишем недопарсенное в файл. chunks.append(chunk) tmp_chunks = list(chain(*chunks)) chunks_as_str = "\n".join(tmp_chunks) remainder = os.path.join(LOG_DIR, "future.txt") write_phrase_to_log(chunks_as_str, "w", WRITE_ENCODING, remainder) drv.quit() quit() except NoSuchElementException as e: print(e) continue write_phrase_to_log(table_html, "a", WRITE_ENCODING, LOG_FILE) successful = True chunk_counter += 1 try: textarea.clear() except NoSuchElementException as e: # Один раз такое исключение встретилось. Если еще раз встретится, попробовать отдебажить. textarea = drv.find_element_by_tag_name("textarea") textarea.clear() print(e) print("Counter {}".format(chunk_counter))
def write_table_closing_tag(): write_phrase_to_log("</table>\n</html>", write_mode='a', enc=WRITE_ENCODING, full_path_to_file=RESULT_FILE)
def handle_phrase(phrase): while True: try: # Транзакция. driver = get_driver() driver.get( "https://yandex.ru/tune/geo/?retpath=https%3A%2F%2Fwww.yandex.ru%2F%3Fdomredir%3D1%26text%3D%25D0%25BA%25D1%2583%25D0%25BF%25D0%25B8%25D1%2582%25D1%258C%2520%25D0%25BA%25D0%25BE%25D0%25BC%25D0%25BF%25D1%258C%25D1%258E%25D1%2582%25D0%25B5%25D1%2580%26lr%3D213%26domredir%3D1&nosync=1" ) change_city(driver) for i in range(PAGES_TO_PARSE): if i == 0: send_phrase_to_search(driver, phrase) sleep(3) parsed_links_tmp = collect_links(driver) print("parsed_links") # link_log_file = "{}.csv".format(SELECTED_REGION) parsed_links = prepare_csv(phrase, parsed_links_tmp) write_list_to_file(parsed_links, WRITE_ENCODING, RESULT_FILE) print("highlited_words") highlited_words_log_file = "{}_highlighted.csv".format( SELECTED_REGION) highlited_words_tmp = get_highlighted_words(driver) highlited_words = prepare_csv(phrase, highlited_words_tmp) full_path_to_highlited_words_file = os.path.join( LOGS_DIR, highlited_words_log_file) write_list_to_file(highlited_words, WRITE_ENCODING, full_path_to_highlited_words_file) if PARSE_RELATED_WORDS: print("tmp_related_item") tmp_related_item_list = collect_related_items(driver) related_item_list = prepare_csv(phrase, tmp_related_item_list) print("related_items") related_items_log_file = "{}_related_items.csv".format( SELECTED_REGION) full_path_to_log_file = os.path.join( LOGS_DIR, related_items_log_file) write_list_to_file(related_item_list, WRITE_ENCODING, full_path_to_log_file) print("go_to_next_page") go_to_next_page(driver) log_file = os.path.join( LOGS_DIR, "{}_last_phrase.txt".format(SELECTED_REGION)) write_phrase_to_log(phrase=phrase, write_mode='a', enc=WRITE_ENCODING, full_path_to_file=log_file) driver.quit() break except Exception as e: print(e) driver.quit() handle_phrase(phrase)
def write_log_footer(): write_phrase_to_log("</table></body></html>", "a", WRITE_ENCODING, LOG_FILE)
def create_csv_titles(): csv_line = "Query;Url;title;keywords;descriptions;h1_s;h2_s;h3_s;h4_s;alts" write_phrase_to_log(phrase=csv_line, write_mode='a', enc=WRITE_FILE_ENCODING, full_path_to_file=RESULT_FILE)
def open_yandex_to_register_acc(): global chrome current_phone = None while all_phones: chrome = get_chrome() url = "https://passport.yandex.ru/registration" chrome.get(url) try: first_name_element = chrome.find_element_by_id("firstname") except NoSuchElementException: send_proxy_to_black_set() first_name_element.send_keys(get_random_string(string_length=10)) last_name_element = chrome.find_element_by_id("lastname") last_name_element.send_keys(get_random_string(string_length=10)) login_element = chrome.find_element_by_id("login") login = generate_unique_login() login_element.send_keys(login) password_element = chrome.find_element_by_id("password") password_element.send_keys(PASSWORD) password_confirm_element = chrome.find_element_by_id( "password_confirm") password_confirm_element.send_keys(PASSWORD) phone_element = chrome.find_element_by_id("phone") phone_number = current_phone or all_phones.pop() if not current_phone: current_phone = phone_number phone_element.send_keys(phone_number) phone_number_without_plus = phone_number[1:] # firefox = get_firefox_with_profile() # buttons = chrome.find_elements_by_tag_name('button') # button_get_code = buttons[1] # Кнопка "Получить код" button_get_code = get_code_button() try: button_get_code.click() except WebDriverException as e: # Выскочило selenium.common.exceptions.WebDriverException: Message: unknown error: Element <button class="button2 button2_size_m button2_theme_normal button2_width_max" type="button" autocomplete="off" aria-pressed="false">...</button> is not clickable at point (753, 528). Other element would receive the click: <input type="tel" class="textinput__control" id="phone" name="phone" value="+12495016287"> # Попробуем еще раз. print(e) button_get_code = get_code_button() button_get_code.click() limit_for_phone_reached = check_limit_reached( phone_number_without_plus) if limit_for_phone_reached: current_phone = None # Больше не использовать текущий телефонный номер. phone_number = None continue confirmation_code = get_confirmation_code(phone_number_without_plus) if not confirmation_code: pass # Вероятно, не пройдена капча. Надо остановить программу, пройти капчу. # Т.е. здесь обязательно должна быть точка останова. confirmation_code = get_confirmation_code( phone_number_without_plus) success = try_code(confirmation_code) if not success: limit_for_phone_reached = after_code_input_check_limit_reached( phone_number_without_plus) current_phone = None # Больше не использовать текущий телефонный номер. phone_number = None continue print("Success: {}".format(login)) write_phrase_to_log("{};{}".format(login, current_phone), "a", WRITE_ENCODING, LOG_FILE) chrome.quit()