import os import logging from pyquery import PyQuery as pq from urllib import parse from lxml import etree from utils import prepare_logs_dir # prepare all necessary for logs prepare_logs_dir() logging.basicConfig( filename='logs/parser.log', level=logging.INFO, format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s', datefmt='%d-%m-%y %H:%M') class ExoneParser: """ Exone website parser """ CURRENT_DIR = os.path.dirname(os.path.realpath(__file__)) # Config path for xml export OUTPUT_DIR = "parsed_xml" OUTPUT_FILENAME = "vacancies.xml" DIR_TO_EXPORT = os.path.join(CURRENT_DIR, OUTPUT_DIR) # the maximum number of attempts to obtain a response, # in case the server responded with an error ATTEMPTS_COUNT = 3
from unittest.mock import patch from utils import prepare_logs_dir CURRENT_DIR = os.path.dirname(os.path.realpath(__file__)) TEST_DATA_DIR_NAME = 'data' TEST_DATA_DIR = os.path.join(CURRENT_DIR, TEST_DATA_DIR_NAME) LOGS_DIR = 'logs' LOGS_PATH = os.path.join(CURRENT_DIR, LOGS_DIR) LIST_PAGE_FILENAME = 'test_list.html' LIST_PAGE_FILEPATH = os.path.join(TEST_DATA_DIR, LIST_PAGE_FILENAME) VACANCY_PAGE_FILENAME = 'test_vacancy.html' VACANCY_PAGE_FILEPATH = os.path.join(TEST_DATA_DIR, VACANCY_PAGE_FILENAME) prepare_logs_dir(LOGS_PATH) class ParserTestCase(unittest.TestCase): """ Parser tests """ TEST_LIST = [{ 'vacancy_type': 'Vollzeit', 'vacancy_location': 'Giengen-Sachsenhausen', 'vacancy_title': 'Elektronik-Montierer/in', 'vacancy_url': 'https://www.exone.de/jm/web/tool/jobmanager/' 'apply.php?sttyp=1&arst=detail&id=85', 'vacancy_id': '85' }]
import grequests as grq import requests as rq import urllib3 from datetime import datetime from urllib import parse from lxml import etree from pyquery import PyQuery as pq from fake_useragent import UserAgent from utils import prepare_logs_dir urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) prepare_logs_dir() logging.basicConfig(filename='logs/parser.log', level=logging.INFO, format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s', datefmt='%d-%m-%y %H:%M') def progress(count, total, status=''): """ Console progress bar """ bar_len = 50 filled_len = int(round(bar_len * count / float(total))) percents = round(100.0 * count / float(total), 1) bar = '=' * filled_len + '-' * (bar_len - filled_len)
from utils import prepare_logs_dir CURRENT_DIR = os.path.dirname(os.path.realpath(__file__)) TEST_DATA_DIR_NAME = 'data' TEST_DATA_DIR = os.path.join(CURRENT_DIR, TEST_DATA_DIR_NAME) LOGS_DIR = 'logs' LOGS_PATH = os.path.join(CURRENT_DIR, LOGS_DIR) LIST_PAGE_FILENAME = 'vacancy_list.json' LIST_PAGE_FILEPATH = os.path.join(TEST_DATA_DIR, LIST_PAGE_FILENAME) VACANCY_PAGE_FILENAME = 'test_vacancy.html' VACANCY_PAGE_FILEPATH = os.path.join(TEST_DATA_DIR, VACANCY_PAGE_FILENAME) prepare_logs_dir(LOGS_PATH) def get_test_data(): """ Load test data from file :return: json test data """ if not os.path.exists(LIST_PAGE_FILEPATH): print("File with test data not found") return None with open(LIST_PAGE_FILEPATH) as f: return json.load(f) class ParserTestCase(unittest.TestCase):