예제 #1
0
def init_urls_to_parse(path_to_raw_file):
    global URLS_TO_PARSE
    global PROJECT

    PROJECT = input("Введите название проекта: ")

    tmp_urls = get_list(URLS_FILE, 'utf-8')
    if tmp_urls:
        URLS_TO_PARSE = tmp_urls
    else:
        tmp_urls = create_urls_to_parse(path_to_raw_file)
        URLS_TO_PARSE = tmp_urls
예제 #2
0
def get_current_email(infinite_loop=True):
    global global_emails
    last_email = get_last_used_email()

    first_free_email = 0

    if last_email:
        last_email_position = global_emails.index(last_email)
        first_free_email = last_email_position + 1

        try:
            current_email = global_emails[first_free_email]
        except IndexError:
            if infinite_loop:
                global_emails = get_list(email_list_full_path)
                current_email = get_current_email()
                return current_email
            else:
                print("Емейлов не осталось.")
                exit()
    else:
        current_email = global_emails[0]

    email_excluded = is_excluded(current_email)

    if email_excluded:
        while email_excluded:
            first_free_email += 1
            current_email = global_emails[first_free_email]
            email_excluded = is_excluded(current_email)

    # email_log = get_email_log_path()

    # with open(email_log, "w") as f:
    #     f.write("%s\n" % current_email)

    return current_email
예제 #3
0
def get_initial_info():
    url_region_phrases_list = get_list(URL_AND_KEYS_FILE, encoding=READ_ENCODING)
    site, region, phrases = separate_url_and_region_and_phrases(url_region_phrases_list)
    return site, region, phrases
예제 #4
0
def initialize():
    keys = get_list(os.path.join(INIT_DIR, KEYS_FILE), READ_ENCODING)
    pass
예제 #5
0
global_phrases = []
global_emails = []

PARSING_PATH_PARTICLE = "../CommerceParsing/"
INIT_PATH_PARTICLE = PARSING_PATH_PARTICLE + "Init/"

PHRASE_BUNCH_SIZE = 100

word_list_full_path = os.path.join(get_current_dir(), INIT_PATH_PARTICLE, "word_list.txt")
email_list_full_path = os.path.join(get_current_dir(), INIT_PATH_PARTICLE, "email_list.txt")

INDEX_LOG_FILE = "index_log.txt"
ENC = 'utf-8'

global_phrases =  get_list(word_list_full_path, ENC)


global_emails = get_list(email_list_full_path, ENC)

def get_log_path(log):
    log_path = os.path.join(get_current_dir(), PARSING_PATH_PARTICLE, "log", log)
    return log_path

def get_phrase_counter_val():
    index_log_file = get_log_path(INDEX_LOG_FILE)
    try:
        with open(index_log_file, 'r') as f:
            line = f.readline()
            if line:
                phrase_counter = int(line)
예제 #6
0
import urllib
from general.general import get_list, get_current_date, add_phrase_in_log
from time import sleep
from general.drv import get_driver

import random
import os

BUKVARIX_DIR = "/home/michael/PycharmProjects/Bukvarix/"

current_date = get_current_date()

phrase_list = get_list(os.path.join(BUKVARIX_DIR, "Init/word_list.txt"))
log_file = os.path.join(BUKVARIX_DIR, "Log/{}.txt".format(current_date))

BUNCH_SIZE = 100

import requests


def pop_bunch():
    global phrase_list
    counter = 0

    phrase_bunch = ""

    while phrase_list and counter < 100:
        current_phrase = "{}\n".format(phrase_list.pop(0))
        phrase_bunch += current_phrase
        counter += 1
예제 #7
0
def get_phrases():
    yandex_parsing_init_dir = os.path.join(
        PROJECT_DIR, "../YandexParsing/init/phrases.txt")
    yandex_parsing_init_file = os.path.join(yandex_parsing_init_dir)
    phrases = get_list(yandex_parsing_init_file, READ_ENCODING)
    return phrases
예제 #8
0
from general.general import get_list, get_current_dir
from general.drv import get_driver
from general.drv import USE_PROXY
import os
from selenium.common.exceptions import NoSuchElementException
from general.drv import send_proxy_to_black_list

full_path_to_proxy_list = os.path.join(
    get_current_dir(), "../CommerceParsing/Init/proxy_list.txt")

init_proxy_list = get_list(full_path_to_proxy_list)


def test_proxies():
    for i in range(len(init_proxy_list)):
        driver = get_driver()
        driver.get("http://ip-api.com/")

        element = driver.find_element_by_id("qr")
        element_text = element.text
        ip = element_text.split(",")

        try:
            region = driver.find_element_by_xpath(
                '//th[text()="Country"]/following-sibling::td').text
        except NoSuchElementException:
            send_proxy_to_black_list(a_proxy)

        driver.quit()
        print("{}, {}".format(ip, region))
예제 #9
0
def keyword_stuffing():
    content_of_init_file = get_list(os.path.join(INIT_DIR, "init.txt"),
                                    READ_ENCODING)
    init(content_of_init_file)
    phrases = content_of_init_file[2:]
    parse_all(phrases)