예제 #1
0
def add_groups_to_bs():
    """
    2 (продолжение).
    Соблюдая алфавитный порядок ЗС в док-те БС 15.01.21.txt,
    добавить в док-т БС 15.01.21.txt
    одиночки из док-та Добавить одиночки в БС 2.txt .

    4. Соблюдая алфавитный порядок ЗС в БС, добавить в БС группы
    из док-тов Добавить группы в БС. Сущ-ные изм.txt
    и Добавить группы в БС. Сущ-ные 2.txt .
    """
    word_forms_bases = list(read_src_bs('src_dict/БС 15.01.21.txt'))

    loners = get_string_list_from_file(
        'src_dict/Добавить одиночки в БС 2.txt')
    loners_list = []
    for loner in loners:
        title_word_form = TitleWordForm(loner, '', [], '')
        group_word_form = GroupWordForm(title_word_form, [])
        loners_list.append(group_word_form)

    modified_nouns = list(read_src_bs(
        'src_dict/Добавить группы в БС. Сущ-ные изм.txt'))
    nouns_2 = list(read_src_bs('src_dict/Добавить группы в БС. Сущ-ные 2.txt',
                               encoding='utf-8'))

    word_forms_bases += loners_list
    word_forms_bases += modified_nouns
    word_forms_bases += nouns_2

    save_bs_dicts_to_txt(sorted(word_forms_bases), 'out/БС 15.01.21.txt')
예제 #2
0
def add_spec_info_to_bg():
    """
    5. Для всех ЗС групп из док-тов
    Добавить группы в БС. Сущ-ные изм.txt
    и Добавить группы в БС. Сущ-ные 2.txt:
    5.1. скопировать ВСЮ спец. информацию у такого ЗС группы,
    5.2. найти такое слово в док-те БГ 08.01.21.txt и
    5.3. добавить скопированную спец. информацию к найденному слову.
    """
    socket_group_word_form_list = list(
        read_src_socket_bs('src_dict/БГ 08.01.21.txt'))

    modified_nouns = list(read_src_bs(
        'src_dict/Добавить группы в БС. Сущ-ные изм.txt'))
    nouns_2 = list(read_src_bs('src_dict/Добавить группы в БС. Сущ-ные 2.txt',
                               encoding='utf-8'))
    group_word_form_list = modified_nouns + nouns_2

    for group_word_form in group_word_form_list:
        title_word_form = group_word_form.title_word_form
        idf = title_word_form.idf
        info = title_word_form.info
        for socket_group_word_form in socket_group_word_form_list:
            for socket_word_form in socket_group_word_form.socket_word_forms:
                s_socket_name = socket_word_form.name.replace('*', '').strip()
                s_title_name = title_word_form.name.replace('*', '').strip()
                if s_socket_name == s_title_name:
                    print(socket_word_form.name)
                    socket_word_form.idf = idf
                    socket_word_form.info = info

    save_socket_bs_dicts_to_txt(socket_group_word_form_list,
                                'out/БГ 23.01.21.txt')
예제 #3
0
def add_groups_to_bs():
    word_forms_bases = list(read_src_bs('src_dict/БС 04.03.21.txt'))

    nouns_2 = list(
        read_src_bs('out/Добавить группы в БС. Сущ-ные.txt', encoding='utf-8'))

    word_forms_bases += nouns_2

    save_bs_dicts_to_txt(sorted(word_forms_bases), 'out/БС 04.03.21.txt')
예제 #4
0
def add_groups_to_bs():
    """
    11. Соблюдая алфавитный порядок ЗС в БС, добавить в док-т БС 31.01.txt
    группы из док-та Добавить группы в БС. Глаголы изм.txt .
    """
    word_forms_bases = list(read_src_bs('src_dict/БС 31.01.txt'))
    verbs = list(read_src_bs(
        'src_dict/Добавить группы в БС. Глаголы изм.txt', encoding='utf-8'))
    word_forms_bases += verbs
    save_bs_dicts_to_txt(sorted(word_forms_bases), 'out/БС 03.02.21.txt')
예제 #5
0
def add_groups_to_bs():
    """
    7. Соблюдая алфавитный порядок ЗС в БС,
    добавить в док-т БС 24.01.21.txt группы из док-та
    Добавить группы в БС. Прил-ные.txt .
    """
    word_forms_bases = list(read_src_bs('src_dict/БС 24.01.21.txt'))
    adjectives = list(
        read_src_bs('src_dict/Добавить группы в БС. Прил-ные.txt',
                    encoding='utf-8'))
    word_forms_bases += adjectives
    save_bs_dicts_to_txt(sorted(word_forms_bases), 'out/БС 27.01.21.txt')
예제 #6
0
def add_spec_info_to_bg():
    """
    12. Для всех ЗС групп из док-та Добавить группы в БС. Глаголы изм.txt
    сделать следующее:
    12.1. скопировать ВСЮ спец. информацию у такого ЗС группы,
    12.2. найти такое слово в док-те БГ 30.01.21.txt и
    12.3. добавить скопированную спец. информацию к найденному слову.
    """
    socket_group_word_form_list = list(
        read_src_socket_bs('src_dict/БГ 30.01.21.txt'))

    verbs = list(read_src_bs(
        'src_dict/Добавить группы в БС. Глаголы изм.txt', encoding='utf-8'))

    for verb in verbs:
        title_word_form = verb.title_word_form
        idf = title_word_form.idf
        info = title_word_form.info
        for socket_group_word_form in socket_group_word_form_list:
            for socket_word_form in socket_group_word_form.socket_word_forms:
                s_socket_name = socket_word_form.name.replace('*', '').strip()
                s_title_name = title_word_form.name.replace('*', '').strip()
                if s_socket_name == s_title_name:
                    print(socket_word_form.name)
                    socket_word_form.idf = idf
                    socket_word_form.info = info

    save_socket_bs_dicts_to_txt(socket_group_word_form_list,
                                'out/БГ 03.02.21.txt')
예제 #7
0
def get_not_included_in_the_lists():
    word_forms_bases = read_src_bs('src_dict/БС 20.03.21.txt')
    file_stems = {
        x.stem: list(get_string_list_from_file(f'{x}'))
        for x in Path('src_dict/lst').glob('*')
    }

    for group in word_forms_bases:
        title_form = group.title_word_form
        info_list = title_form.info
        if info_list:
            for idf in info_list:
                if idf in IDFS:
                    out_stem = ', '.join(IDFS[idf])
                    flag = True
                    added_title_form = []
                    for f_stem in IDFS[idf]:
                        if str(title_form) in file_stems[f_stem]:
                            flag = False
                    if (
                            flag
                            and str(title_form) not in added_title_form
                    ):
                        print(title_form)
                        add_string_to_file(str(title_form),
                                           f'out/lst/{out_stem} ещё.txt')

                        added_title_form.append(str(title_form))
예제 #8
0
def get_capital_letter_bs():
    capital_letter_bs = list(
        get_string_list_from_file('out/Большая буква. БС.txt'))

    word_forms_bases = read_src_bs('src_dict/БС 13.03.21.txt')
    title_forms = [str(x.title_word_form) for x in word_forms_bases]

    capital_letter_bs_homonyms = []

    for capital_word in capital_letter_bs:
        capital_word_name = get_bs_title_word_form(capital_word).name
        title_form_list = []
        for title_form in title_forms:
            form_name = get_bs_title_word_form(title_form).name.replace(
                '*', '')
            if form_name == capital_word_name.lower():
                print(title_form)
                title_form_list.append(title_form)

        if title_form_list:
            capital_letter_bs_homonyms.append(capital_word)
            capital_letter_bs_homonyms += title_form_list
            capital_letter_bs_homonyms.append('')

    save_list_to_file(capital_letter_bs_homonyms[:-1],
                      'out/Большая буква. БС. Омонимы.txt')
예제 #9
0
def get_bs_abbreviation_homonyms():
    abbreviation_bs = get_string_list_from_file('out/Аббревиатура. БС.txt')

    word_forms_bases = read_src_bs('src_dict/БС 13.03.21.txt')
    title_forms = [str(x.title_word_form) for x in word_forms_bases]

    bs_abbreviation_homonyms = []

    for abbreviation in abbreviation_bs:
        abbreviation_name = get_bs_title_word_form(abbreviation).name
        title_form_list = []
        for title_form in title_forms:
            form_name = get_bs_title_word_form(title_form).name.replace(
                '*', '')
            if form_name == abbreviation_name.lower():
                print(title_form)
                title_form_list.append(title_form)

        if title_form_list:
            bs_abbreviation_homonyms.append(abbreviation)
            bs_abbreviation_homonyms += title_form_list
            bs_abbreviation_homonyms.append('')

    save_list_to_file(bs_abbreviation_homonyms[:-1],
                      'out/Аббревиатура. БС. Омонимы.txt')
예제 #10
0
def get_headwords():
    word_forms_bases = list(read_src_bs('src_dict/БС 09.03.21.txt'))

    headwords = [x.title_word_form for x in word_forms_bases]

    headwords_reruns = []

    for headword in headwords:
        print(headword)
        for group in word_forms_bases:
            if str(headword) != str(group.title_word_form):
                if group.word_forms:
                    word_forms = group.word_forms
                    word_form_names = [x.name for x in word_forms]
                    headword_name = headword.name.replace('*', '')
                    if headword_name in word_form_names:
                        title_form = group.title_word_form
                        headwords_reruns.append(str(headword))
                        headwords_reruns.append(str(title_form))
                        for word_form in word_forms:
                            if word_form.name == headword_name:
                                headwords_reruns.append(str(word_form))
                        headwords_reruns.append('')

    save_list_to_file(headwords_reruns, 'out/ЗС-повторы.txt')
예제 #11
0
def add_spec_info_to_bg():
    """
    8. Для всех ЗС групп из док-та Добавить группы в БС. Прил-ные.txt
    сделать следующее:
    8.1. скопировать ВСЮ спец. информацию у такого ЗС группы,
    8.2. найти такое слово в док-те БГ 24.01.21.txt и
    8.3. добавить скопированную спец. информацию к найденному слову.
    """
    socket_group_word_form_list = list(
        read_src_socket_bs('src_dict/БГ 24.01.21.txt'))

    adjectives = list(
        read_src_bs('src_dict/Добавить группы в БС. Прил-ные.txt',
                    encoding='utf-8'))

    for adj in adjectives:
        title_word_form = adj.title_word_form
        idf = title_word_form.idf
        info = title_word_form.info
        for socket_group_word_form in socket_group_word_form_list:
            for socket_word_form in socket_group_word_form.socket_word_forms:
                s_socket_name = socket_word_form.name.replace('*', '').strip()
                s_title_name = title_word_form.name.replace('*', '').strip()
                if s_socket_name == s_title_name:
                    print(socket_word_form.name)
                    socket_word_form.idf = idf
                    socket_word_form.info = info

    save_socket_bs_dicts_to_txt(socket_group_word_form_list,
                                'out/БГ 27.01.21.txt')
예제 #12
0
def get_two_in_one():
    remaining_repetitions = get_string_list_from_file('src_dict/Повторы ост.txt')
    word_forms_bases = read_src_bs('src_dict/БС 27.02.21.txt')
    bs_word_forms = [x.title_word_form for x in word_forms_bases]

    bs_word_names = [
        ' '.join(filter(None, [
            x.name,
            x.idf,
            ' '.join(x.info),
            x.note
        ]))
        for x in bs_word_forms
    ]

    relevant = []
    not_relevant = []

    for repeat in remaining_repetitions:
        if repeat in bs_word_names:
            relevant.append(repeat)
        else:
            not_relevant.append(repeat)

    save_list_to_file(relevant, 'out/Повторы ост. совпадает с БС.txt')
    save_list_to_file(not_relevant, 'out/Повторы ост. не совпадает с БС.txt')
예제 #13
0
def ordinary_words_bs():
    word_forms_bases = read_src_bs('src_dict/БС 02.03.21.txt')
    bs_word_forms = [str(x.title_word_form) for x in word_forms_bases]

    exclusion_list = []

    # Многокорневые слова БС
    multi_root_bs_forms = get_string_list_from_file(
        'src_dict/Многокорневые слова БС.txt')
    exclusion_list += multi_root_bs_forms

    # Омонимы БС
    homonyms_bs = get_string_list_from_file('src_dict/Омонимы БС.txt')
    exclusion_list += homonyms_bs

    # Повторы ост. совпадает с БС
    remaining_repetitions = get_string_list_from_file(
        'src_dict/Повторы ост. совпадает с БС.txt')
    remaining_repetitions = [str(get_bs_title_word_form(x))
                             for x in remaining_repetitions]
    exclusion_list += remaining_repetitions

    # Обычные слова БС
    ordinary_words_bs_list = []

    for bs_str_form in bs_word_forms:
        if bs_str_form not in exclusion_list:
            ordinary_words_bs_list.append(bs_str_form)
            # print(bs_str_form)

    save_list_to_file(ordinary_words_bs_list, 'out/Обычные слова БС.txt')
예제 #14
0
def alphabetical_order():
    """Теперь нужно только расположить группы в док-те Кунсткамера2.txt
    в соответствии с алфавитным порядком ЗС.
    Поэтому высылаю док-т Кунсткамера2.txt"""
    kunstkamera_2 = read_src_bs('src_dict/Кунсткамера2.txt', encoding='utf-8')
    save_bs_dicts_to_txt(sorted(kunstkamera_2),
                         'out/Кунсткамера2.txt',
                         encoding='utf-8')
예제 #15
0
def find_all_multi_rooted_words_from_bs():
    """
    15. Найти в док-те БС 06.04.21.txt все слова (ЗС групп и одиночки)
    из док-та Многокорневые слова.xlsx
    и создать список строк с такими словами - док-т Многокорневые слова БС.txt .
    Учитывая п.п. 1 и 2 Правил соотношения БГ и БС,
    сравнить каждую строку док-та Многокорневые слова БС.txt
    с каждой ячейкой со словом в док-те Многокорневые слова.xlsx .
    """

    multi_root_words = get_dicts_from_csv_file(
        'out/Многокорневые слова.csv')

    word_forms_bases = list(read_src_bs('src_dict/БС 06.04.21.txt'))

    multi_root_bg_forms = []

    for multi_root_word in multi_root_words:
        for root_index_key in list(multi_root_word)[1:]:
            if multi_root_word[root_index_key]:
                socket_form = get_socket_word_form(
                    multi_root_word[root_index_key]
                )
                multi_root_bg_forms.append(
                    ' '.join(filter(
                        None,
                        [
                            socket_form.name,
                            socket_form.idf,
                            ' '.join(socket_form.info),
                            socket_form.note.replace('* ', ''),
                        ])))

    multi_root_bs_forms = []

    for group_word_form in word_forms_bases:
        title_form = group_word_form.title_word_form
        src_title_form = ' '.join(filter(
            None,
            [
                title_form.name,
                title_form.idf,
                ' '.join(title_form.info),
                (title_form.note.replace('.* ', '')
                 if '<' not in title_form.note else None),
            ]))
        if src_title_form in multi_root_bg_forms:
            print(title_form)
            multi_root_bs_forms.append(str(title_form))

    multi_root_bs_forms = sorted(
        multi_root_bs_forms,
        key=lambda x: x.replace('*', '').lower().strip()
    )

    save_list_to_file(multi_root_bs_forms, 'out/Многокорневые слова БС.txt')
예제 #16
0
def save_kunstkamera_2():
    to_the_kunstkamera_list = list(
        get_string_list_from_file('src_dict/В Кунсткамеру2.txt'))
    kunstkamera_2 = []
    kunstkamera_2_title_string = []

    for group in read_src_bs('src_dict/БС 29.11.20.txt'):
        title_form = group.title_word_form
        title_string = ' '.join(
            filter(None, [
                title_form.name, title_form.idf, ' '.join(title_form.info),
                title_form.note
            ])).strip()
        if title_string in to_the_kunstkamera_list:
            kunstkamera_2.append(group)
            kunstkamera_2_title_string.append(title_string)
    save_bs_dicts_to_txt(sorted(kunstkamera_2),
                         'out/Кунсткамера2.txt',
                         encoding='utf-8')

    remaining_to_the_kunstkamera = [
        x for x in to_the_kunstkamera_list
        if x not in kunstkamera_2_title_string
    ]
    save_list_to_file(remaining_to_the_kunstkamera,
                      'out/Не выполняется п.1.txt')

    remaining_word_forms_bases = list(
        filter(
            lambda x: ' '.join(
                filter(None, [
                    x.title_word_form.name, x.title_word_form.idf, ' '.join(
                        x.title_word_form.info), x.title_word_form.note
                ])).strip() not in to_the_kunstkamera_list,
            read_src_bs('src_dict/БС 29.11.20.txt')))
    add_singles_bases = list(get_singles_bases())
    remaining_word_forms_bases += add_singles_bases
    save_bs_dicts_to_txt(sorted(remaining_word_forms_bases),
                         'out/БС 12.12.20.txt')
예제 #17
0
def get_spec_note():
    word_forms_bases = list(read_src_bs('src_dict/БС 23.02.21.txt'))

    spec_note_bs = []

    for title_form in [x.title_word_form for x in word_forms_bases]:
        title_note = title_form.note
        if title_note:
            if title_note.startswith('.* <'):
                if ' ' not in title_note[5:]:
                    spec_note_bs.append(str(title_form))

    save_list_to_file(spec_note_bs, 'out/Спец. прим. БС. 1 слово.txt')
예제 #18
0
def get_adjusted_participles_bs():
    word_forms_bases = read_src_bs('src_dict/БС 28.01.21.txt')

    adjusted_participles_list = []

    for group_word_form in word_forms_bases:
        title_word_form = group_word_form.title_word_form
        if title_word_form.name.startswith('*'):
            print(title_word_form)
            adjusted_participles_list.append(str(title_word_form))

    save_list_to_file(adjusted_participles_list,
                      'out/Адъектированные причастия БС.txt')
예제 #19
0
def get_bs_names():
    word_forms_bases = list(read_src_bs('src_dict/БС 09.03.21.txt'))

    bs_names = []

    for group in word_forms_bases:
        if group.word_forms:
            word_forms = group.word_forms
            word_form_names = [x.name.replace('*', '') for x in word_forms]
            bs_names += word_form_names

    bs_names = sorted(list(set(bs_names)), key=str.lower)

    save_list_to_file(bs_names, 'out/bs_names.txt')
예제 #20
0
def get_no_full_form():
    word_forms_bases = read_src_bs('src_dict/БС 24.03.21.txt')

    no_full_form_list = []

    for group in word_forms_bases:
        title_form = group.title_word_form
        info_list = title_form.info
        if info_list and title_form.idf.startswith('.П'):
            if info_list[0].startswith(('К', 'С', 'П')):
                print(title_form)
                no_full_form_list.append(str(title_form))

    save_list_to_file(no_full_form_list, 'out/НЕТ полной формы.txt')
예제 #21
0
def change_template_bs():
    word_forms_bases = list(read_src_bs('src_dict/БС 15.05.21.txt'))
    for group in word_forms_bases:
        title_form = group.title_word_form
        if all([
                title_form.idf == '.СеИ',
                title_form.name.endswith('ий'),
                title_form.name not in ('вий', 'змий', 'кий'),
        ]):
            for count, identifier in enumerate(title_form.info):
                if identifier == 'мнI2':
                    title_form.info[count] = 'мнI2**'

    save_bs_dicts_to_txt(sorted(word_forms_bases), 'out/БС 17.05.21.txt')
예제 #22
0
def check_verbs():
    verbs = sorted(
        list(
            get_string_list_from_file(
                'src_dict/Г64. -АТЬ(СЯ), -ЕТЬ(СЯ) II спр. сов. в.txt')) + list(
                    get_string_list_from_file(
                        'src_dict/Г65. -ИТЬ(СЯ) II спр. сов. в.txt')))

    word_forms_bases = list(read_src_bs('src_dict/БС 27.03.21.txt'))

    nouns = []
    g64_g65_a = []
    g64_g65_b = []

    for verb in verbs:
        for group in word_forms_bases:
            if verb == str(group.title_word_form):
                print(verb)
                gnb2e = None
                for word_form in group.word_forms:
                    if word_form.idf == '.ГНБ2е':
                        gnb2e = word_form
                        break

                if gnb2e:
                    if gnb2e.name.endswith(ENDINGS):
                        if gnb2e.name.endswith('ся'):
                            prefix = gnb2e.name[:-5]
                        else:
                            prefix = gnb2e.name[:-3]

                        if prefix[-1] in LETTERS:
                            g64_g65_b.append(verb)
                        else:
                            g64_g65_a.append(verb)

                    else:  # окончание какое-то другое
                        nouns.append(verb)

                else:  # если словоформа .ГНБ2е отсутствует
                    nouns.append(verb)

                break

    save_list_to_file(verbs, 'out/Г64&Г65.txt')
    save_list_to_file(nouns, 'out/ГНБ2е отс. или оконч. другое.txt')
    save_list_to_file(g64_g65_a, 'out/Г64&Г65А.txt')
    save_list_to_file(g64_g65_b, 'out/Г64&Г65Б.txt')
예제 #23
0
def change_bs_abbreviation():
    abbreviation_bs = list(
        get_string_list_from_file('src_dict/Аббревиатура. БС.txt'))
    capital_letter_bs = list(
        get_string_list_from_file('src_dict/Большая буква. БС.txt'))

    word_forms_bases = list(read_src_bs('src_dict/БС 15.03.21.txt'))

    for group in word_forms_bases:
        title_form = group.title_word_form
        if (str(title_form) in abbreviation_bs
                or str(title_form) in capital_letter_bs):
            title_form.name = title_form.name.lower()
            for word_form in group.word_forms:
                word_form.name = word_form.name.lower()

    save_bs_dicts_to_txt(sorted(word_forms_bases), 'out/БС 16.03.21.txt')
예제 #24
0
def find_all_multi_rooted_words_from_bs():
    multi_root_words = get_dicts_from_csv_file(
        'src_dict/Многокорневые слова.csv')

    word_forms_bases = list(read_src_bs('src_dict/БС 03.02.21.txt'))

    multi_root_bg_forms = []

    for multi_root_word in multi_root_words:
        for root_index_key in list(multi_root_word)[1:]:
            if multi_root_word[root_index_key]:
                socket_form = get_socket_word_form(
                    multi_root_word[root_index_key]
                )
                multi_root_bg_forms.append(
                    ' '.join(filter(
                        None,
                        [
                            socket_form.name,
                            socket_form.idf,
                            ' '.join(socket_form.info),
                            socket_form.note.replace('* ', '')
                        ])))

    multi_root_bs_forms = []

    for group_word_form in word_forms_bases:
        title_form = group_word_form.title_word_form
        src_title_form = ' '.join(filter(
            None,
            [
                title_form.name,
                title_form.idf,
                ' '.join(title_form.info),
                title_form.note.replace('.* ', '')
            ]))
        if src_title_form in multi_root_bg_forms:
            print(title_form)
            multi_root_bs_forms.append(str(title_form))

    multi_root_bs_forms = sorted(
        multi_root_bs_forms,
        key=lambda x: x.replace('*', '').lower().strip()
    )

    save_list_to_file(multi_root_bs_forms, 'out/Многокорневые слова БС.txt')
예제 #25
0
def get_not_included_in_the_lists():
    word_forms_bases = read_src_bs('src_dict/БС 24.03.21.txt')
    file_stems = {
        x.stem: list(get_string_list_from_file(f'{x}'))
        for x in Path('src_dict/lst').glob('*')
    }

    for group in word_forms_bases:
        title_form = group.title_word_form
        info_list = title_form.info
        if info_list:
            for idf in info_list:
                if idf in IDFS:
                    out_stem = IDFS[idf]
                    if str(title_form) not in file_stems[out_stem]:
                        print(title_form)
                        add_string_to_file(str(title_form),
                                           f'out/lst/{out_stem} ещё.txt')
예제 #26
0
def add_groups_to_bs():
    """3. Используя списки на вкладке "Добавить группы в БС"
    док-та Существительные.xlsx ,
    а также шаблоны, приведённые на вкладках "ШАБЛОНЫ ед.ч." и "ШАБЛОНЫ мн.ч."
    того же док-та,
    создать док-т Добавить группы в БС. Сущ-ные.txt ."""
    src_groups = get_dicts_from_csv_file('Добавить группы в БС.csv')
    add_groups_to_bs_list = []

    for src_dict in src_groups:
        add_groups_to_bs_list.append(get_group_word_form(src_dict))

    save_bs_dicts_to_txt(sorted(add_groups_to_bs_list),
                         'out/Добавить группы в БС. Сущ-ные.txt',
                         encoding='utf-8')

    word_forms_bases = list(read_src_bs('src_dict/БС 13.12.20.txt'))
    word_forms_bases += add_groups_to_bs_list
    save_bs_dicts_to_txt(sorted(word_forms_bases), 'out/БС 14.12.20.txt')
예제 #27
0
def check_bs_islower():
    word_forms_bases = list(read_src_bs('out/БС 16.03.21.txt'))

    bs_islower_list = []

    for group in word_forms_bases:
        title_form = group.title_word_form
        check_form = ' '.join(
            filter(None, [
                title_form.name,
                title_form.note,
            ]))
        check_form = check_form.replace('I', '')
        check_form = check_form.replace('V', '')
        if not check_form.islower():
            print(check_form)
            bs_islower_list.append(str(title_form))

    save_list_to_file(bs_islower_list, 'out/БС ЕСТЬ большие буквы.txt')
예제 #28
0
def check_presence():
    homonyms = [get_bs_title_word_form(x).name
                for x in get_string_list_from_file('src_dict/Омонимы БС.txt')]

    word_forms_bases = list(read_src_bs('src_dict/БС 16.03.21 изм.txt'))
    bs_names = [
        x.title_word_form.name.replace('*', '').strip()
        for x in word_forms_bases
    ]

    for file_path in Path('out/lst').glob('*'):
        file_stem = file_path.stem

        presence = []  # 28а. Если слово имеется в док-те Омонимы БС.txt
        nouns = []  # 29а. если слово находится
        absent = []  # 29б. если слово НЕ находится

        words = get_string_list_from_file(file_path, encoding='cp1251')
        for word in words:
            print(word)
            if word in homonyms:
                presence.append(word)

            else:
                if word in bs_names:
                    for group in word_forms_bases:
                        if word == group.title_word_form.name:
                            print(group.title_word_form)
                            nouns.append(str(group.title_word_form))
                else:
                    absent.append(word)

        if presence:
            save_list_to_file(
                sorted(presence), f'out/homonyms/{file_stem} омонимы.txt')

        if nouns:
            save_list_to_file(sorted(nouns), f'out/C/С{file_stem}.txt')

        if absent:
            save_list_to_file(
                sorted(absent), f'out/absent/{file_stem} отсутствует.txt')
예제 #29
0
def get_homonyms_bs():
    word_forms_bases = list(read_src_bs('src_dict/БС 06.04.21.txt'))

    word_names = [
        x.title_word_form.name.replace('*', '').strip()
        for x in word_forms_bases
    ]
    word_names = [x for x, y in Counter(word_names).items() if y > 1]
    word_names = sorted(list(set(word_names)))

    homonyms = []

    for group_word_form in word_forms_bases:
        title_form = group_word_form.title_word_form
        if title_form.name.replace('*', '').strip() in word_names:
            homonyms.append(str(title_form))

    save_list_to_file(
        sorted(homonyms, key=lambda x: x.replace('*', '').strip().lower()),
        'out/Омонимы БС.txt')
예제 #30
0
def get_capital_letter_bs():
    capital_letter_bs = list(
        get_string_list_from_file('src_dict/Большая буква. БС.txt'))

    word_forms_bases = read_src_bs('src_dict/БС 09.03.21.txt')
    title_forms = [str(x.title_word_form) for x in word_forms_bases]

    capital_letter_bs_homonyms = []

    for capital_word in capital_letter_bs:
        capital_word_name = get_bs_title_word_form(capital_word).name
        for title_form in title_forms:
            form_name = get_bs_title_word_form(title_form).name.replace(
                '*', '')
            if form_name == capital_word_name.lower():
                print(title_form)
                capital_letter_bs_homonyms.append(title_form)

    save_list_to_file(
        sorted(capital_letter_bs_homonyms, key=lambda x: x.replace('*', '')),
        'out/Большая буква. БС. Омонимы.txt')