コード例 #1
0
ファイル: vstup2017.py プロジェクト: ogroleg/ispyt
def parse_areas_of_study_and_write_to_database(input_arguments):
    db = connect_to_database(input_arguments.db_host, input_arguments.db)
    if input_arguments.erase:
        db.areas.drop()
        db.courses.drop()
    html_univ_files = get_univ_files(input_arguments.path)
    areas = dict()
    for univ_file in html_univ_files:
        with open(univ_file, 'rb') as file:
            q = file.read()
        local_areas = get_area_course_info(q)
        for key, value in local_areas.items():
            if key not in areas:
                areas[key] = list()
            for course in value:
                if course not in areas[key]:
                    areas[key].append(course)
    print(areas)
    areas_dao = [{'area_id_old': generate_id(title), 'area_title': title}
                 for title in areas.keys()]
    courses_dao = list()
    for title, course_list in areas.items():
        curr_courses = [{'area_id_old': generate_id(title), 'course_title': c}
                        for c in course_list]
        courses_dao.extend(curr_courses)
    db.areas.insert_many(areas_dao)
    db.courses.insert_many(courses_dao)
コード例 #2
0
ファイル: vstup2017.py プロジェクト: ogroleg/ispyt
def parse_univ_pages_and_write_to_database(input_arguments):
    db = connect_to_database(input_arguments.db_host, input_arguments.db)
    if input_arguments.erase:
        db.univs.drop()
    html_univ_files = get_univ_files(input_arguments.path)
    univs_to_insert = dict()
    for univ_file in html_univ_files:
        with open(univ_file, 'rb') as file:
            q = file.read()
        univ = get_univ_info_from_page_2017(q)

        # tlower = univ['univ_title'].lower()
        # if 'коледж' in tlower or 'технікум' in tlower or 'училищ' in tlower:
        #     continue

        univ['univ_title'] = sub(r'\([^)]*\)', '', univ['univ_title'].strip())
        while '  ' in univ['univ_title']:
            univ['univ_title'] = univ['univ_title'].replace('  ', ' ')

        univ['univ_location'] = get_city_name_from_address(
            univ['univ_address'])
        # generate_id(univ['univ_title'])
        univ['univ_id'] = int(univ_file[univ_file.rindex('i') + 1:-5])

        univs_to_insert[univ['univ_id']] = univ
        if len(univs_to_insert) % 10 == 0:
            print(len(univs_to_insert))
    db.univs.insert_many(univs_to_insert.values())
コード例 #3
0
ファイル: init.py プロジェクト: Maestro3479/Laba-4
def _create_database():
    with connect_to_database() as db:
        cursor = db.cursor()

        cursor.execute('''
                CREATE TABLE IF NOT EXISTS Авторы(`id` INTEGER PRIMARY KEY,
                                                  `имя` TEXT,
                                                  `страна` TEXT,
                                                  `годы жизни` TEXT)
            ''')
        db.commit()

        cursor.execute('''
                CREATE TABLE IF NOT EXISTS Книги(`id` INTEGER PRIMARY KEY,
                                                 `id автора` INTEGER,
                                                 `название` TEXT,
                                                 `количество страниц` INTEGER,
                                                 `издательство` TEXT,
                                                 `год издания` INTEGER,
                                                  FOREIGN KEY('id автора')
                                                  REFERENCES Авторы(id))
            ''')
        db.commit()

        cursor.execute('''
                CREATE TABLE IF NOT EXISTS
                    Пользователи(`id` INTEGER PRIMARY KEY,
                                 `логин` TEXT,
                                 `пароль` TEXT)
            ''')
        db.commit()
コード例 #4
0
ファイル: init.py プロジェクト: Maestro3479/Laba-4
def _fill_database():
    authors = ("1|L.N.Tolstoi    |Russia |1828-1910\n"
               "2|F.M.Dostoyevsky|Russia |1821-1881\n"
               "3|B.Vian         |France |1920-1959\n"
               "4|A.Camus        |France |1913-1960\n"
               "5|F.Kafka        |Austria|1883-1924")

    books = ("1|1|War and Peace   |1225|The Russian Messanger|1869\n"
             "2|1|Resurrection    |483 |Niva                 |1899\n"
             "3|2|The Idiot       |678 |The Russian Messanger|1868\n"
             "4|2|The Gambler     |241 |The Moscow Renaisanse|1867\n"
             "5|3|The Foam of days|219 |Gallimard            |1947\n"
             "6|4|The Stranger    |159 |Hamish Hamilton      |1946\n"
             "7|4|The Rebel       |238 |Gallimard            |1951\n"
             "8|5|The Trial       |395 |Verlag Die Schmiede  |1925\n"
             "9|5|Amerika         |351 |Routledge            |1938")

    users = "1|admin|d4d1c9e67f05a7785990dea88020f20a"

    with connect_to_database() as db:
        cursor = db.cursor()

        def fill(table_string, table_name):
            for row_string in table_string.split('\n'):
                values = ','.join(
                    repr(field.strip()) for field in row_string.split('|'))
                cursor.execute('''
                        INSERT OR IGNORE into {} values ({})
                    '''.format(table_name, values))
            db.commit()

        fill(authors, 'Авторы')
        fill(books, 'Книги')
        fill(users, 'Пользователи')
コード例 #5
0
ファイル: vstup2017.py プロジェクト: ogroleg/ispyt
def start_parsing_pages_and_write_result_to_database(input_arguments):
    if input_arguments.erase:
        db = connect_to_database(input_arguments.db_host, input_arguments.db)
        db.requests.drop()
    path_to_data = input_arguments.path
    files_to_read_queue, file_cache, is_all_files_read = create_queues()
    add_files_to_queue(path_to_data, files_to_read_queue)
    read_data_from_files(files_to_read_queue, file_cache, is_all_files_read)
    print(f'Total files to process: {files_to_read_queue.qsize()}')
    print()
    pool = multiprocessing.Pool(input_arguments.workers)  # create pool
    pool.starmap(main_worker,
                 [(file_cache, is_all_files_read, input_arguments)] *
                 input_arguments.workers)  # and process queue using pool
    print('exit')
コード例 #6
0
ファイル: vstup2017.py プロジェクト: ogroleg/ispyt
def main_worker(files_cache, is_all_files_read, input_arguments):
    db = connect_to_database(input_arguments.db_host, input_arguments.db)
    results = []
    while not is_all_files_read.is_set() or not files_cache.empty():
        try:
            filename, file_string = files_cache.get(timeout=FILE_CACHE_DELAY)
        except queue.Empty:
            continue
        result = process_page_with_admission_requests(filename, file_string)
        if result is None or len(result) == 0:
            continue
        for r in result:
            logger.info(r)
        results.extend(result)
        if len(results) > NUM_RESULTS_TO_SAVE:
            save_results_to_db(db, results)
            results = []
    if results:  # ensure everything is saved
        save_results_to_db(db, results)