Ejemplo n.º 1
0
def compare_homonyms_spec_note():
    homonyms_bg = get_string_list_from_file('src_dict/Омонимы БГ.txt')

    homonyms_bg_str_form = []

    for homonyms in homonyms_bg:
        socket_form = get_socket_word_form(homonyms)

        spec_note = socket_form.spec_note.replace('< ', '')
        spec_note_socket_form = get_socket_word_form(spec_note)
        spec_note = ' '.join(
            filter(None, [
                spec_note_socket_form.invisible,
                spec_note_socket_form.name,
                spec_note_socket_form.root_index,
                spec_note_socket_form.idf,
                ' '.join(spec_note_socket_form.info),
                spec_note_socket_form.note,
            ]))

        string_form = ' '.join(
            filter(None, [
                socket_form.name,
                socket_form.idf,
                ' '.join(socket_form.info),
                spec_note,
            ]))
        homonyms_bg_str_form.append(string_form)

    homonyms_spec_note = get_string_list_from_file(
        'src_dict/О-мы БС спец. прим. не совпадают с Повторами.txt')

    homonyms_spec_note_relevant = []
    homonyms_spec_note_not_relevant = []

    for homonym in homonyms_spec_note:
        title_form = get_bs_title_word_form(homonym)
        print(title_form.name, title_form.note)
        string_form = ' '.join(
            filter(None, [
                title_form.name,
                title_form.idf,
                ' '.join(title_form.info),
                title_form.note.replace('.* < ', ''),
            ]))
        if string_form in homonyms_bg_str_form:
            homonyms_spec_note_relevant.append(homonym)
        else:
            homonyms_spec_note_not_relevant.append(homonym)

    save_list_to_file(homonyms_spec_note_relevant,
                      'out/О-мы БС спец. прим. совпадают с О-мами БГ.txt')
    save_list_to_file(homonyms_spec_note_not_relevant,
                      'out/О-мы БС спец. прим. не совпадают с О-мами БГ.txt')
Ejemplo n.º 2
0
def get_homonymous_multi_rooted():
    multi_root_words = get_dicts_from_csv_file(
        'src_dict/Многокорневые слова.csv')

    multi_root_names = []
    for multi_root_word in multi_root_words:
        for root_index_key in list(multi_root_word):
            if multi_root_word[root_index_key]:
                multi_root_names.append(
                    get_socket_word_form(multi_root_word[root_index_key]).name)

    socket_group_list = list(read_src_socket_bs('src_dict/БГ 10.04.21.txt'))

    homonymous_multi_rooted = []

    for socket_group in socket_group_list:
        for sub_group in socket_group.sub_groups:
            title_word_form = sub_group.title_word_form
            for word_form in sub_group.socket_word_forms:
                if (not word_form.invisible and not word_form.root_index):
                    if word_form.name in multi_root_names:
                        print(word_form)
                        if str(word_form) == str(title_word_form):
                            homonymous_multi_rooted.append(str(word_form))
                        else:
                            homonymous_multi_rooted.append(' < '.join([
                                str(word_form),
                                str(title_word_form),
                            ]))

    homonymous_multi_rooted = sorted(
        homonymous_multi_rooted,
        key=lambda x: x.replace('*', '').strip().lower())
    save_list_to_file(homonymous_multi_rooted,
                      'out/Слова, омонимичные многокорневым словам.txt')
Ejemplo n.º 3
0
def get_capital_letter_bg():
    capital_letter_bg = list(
        get_string_list_from_file('out/Большая буква. БГ.txt'))
    socket_group_list = list(read_src_socket_bs('src_dict/БГ 13.03.21.txt'))

    capital_letter_bg_homonyms = []

    for capital_word in capital_letter_bg:
        capital_word_name = get_socket_word_form(capital_word).name
        word_form_list = []
        for socket_group in socket_group_list:
            for sub_group in socket_group.sub_groups:
                for word_form in sub_group.socket_word_forms:
                    if not word_form.invisible:
                        form_name = word_form.name.replace('*', '')
                        if form_name == capital_word_name.lower():
                            print(word_form)
                            word_form_list.append(str(word_form))

        if word_form_list:
            capital_letter_bg_homonyms.append(capital_word)
            capital_letter_bg_homonyms += word_form_list
            capital_letter_bg_homonyms.append('')

    save_list_to_file(capital_letter_bg_homonyms[:-1],
                      'out/Большая буква. БГ. Омонимы.txt')
Ejemplo n.º 4
0
def get_bg_abbreviation_homonyms():
    abbreviation_bg = get_string_list_from_file('out/Аббревиатура. БГ.txt')
    abbreviation_bg = sorted(list(set(abbreviation_bg)))

    socket_group_list = list(read_src_socket_bs('src_dict/БГ 13.03.21.txt'))

    bg_abbreviation_homonyms = []

    for abbreviation in abbreviation_bg:
        abbreviation_name = get_socket_word_form(abbreviation).name
        word_form_list = []
        for socket_group in socket_group_list:
            for sub_group in socket_group.sub_groups:
                for word_form in sub_group.socket_word_forms:
                    if not word_form.invisible:
                        form_name = word_form.name.replace('*', '')
                        if form_name == abbreviation_name.lower():
                            print(form_name)
                            word_form_list.append(str(word_form))
        if word_form_list:
            bg_abbreviation_homonyms.append(abbreviation)
            bg_abbreviation_homonyms += word_form_list
            bg_abbreviation_homonyms.append('')

    save_list_to_file(bg_abbreviation_homonyms[:-1],
                      'out/Аббревиатура. БГ. Омонимы.txt')
Ejemplo n.º 5
0
def find_all_multi_rooted_words_from_bs():
    """
    15. Найти в док-те БС 06.04.21.txt все слова (ЗС групп и одиночки)
    из док-та Многокорневые слова.xlsx
    и создать список строк с такими словами - док-т Многокорневые слова БС.txt .
    Учитывая п.п. 1 и 2 Правил соотношения БГ и БС,
    сравнить каждую строку док-та Многокорневые слова БС.txt
    с каждой ячейкой со словом в док-те Многокорневые слова.xlsx .
    """

    multi_root_words = get_dicts_from_csv_file(
        'out/Многокорневые слова.csv')

    word_forms_bases = list(read_src_bs('src_dict/БС 06.04.21.txt'))

    multi_root_bg_forms = []

    for multi_root_word in multi_root_words:
        for root_index_key in list(multi_root_word)[1:]:
            if multi_root_word[root_index_key]:
                socket_form = get_socket_word_form(
                    multi_root_word[root_index_key]
                )
                multi_root_bg_forms.append(
                    ' '.join(filter(
                        None,
                        [
                            socket_form.name,
                            socket_form.idf,
                            ' '.join(socket_form.info),
                            socket_form.note.replace('* ', ''),
                        ])))

    multi_root_bs_forms = []

    for group_word_form in word_forms_bases:
        title_form = group_word_form.title_word_form
        src_title_form = ' '.join(filter(
            None,
            [
                title_form.name,
                title_form.idf,
                ' '.join(title_form.info),
                (title_form.note.replace('.* ', '')
                 if '<' not in title_form.note else None),
            ]))
        if src_title_form in multi_root_bg_forms:
            print(title_form)
            multi_root_bs_forms.append(str(title_form))

    multi_root_bs_forms = sorted(
        multi_root_bs_forms,
        key=lambda x: x.replace('*', '').lower().strip()
    )

    save_list_to_file(multi_root_bs_forms, 'out/Многокорневые слова БС.txt')
Ejemplo n.º 6
0
def get_remaining_repetitions():
    homonyms = []

    for line in get_string_list_from_file(
            'src_dict/О-мы БС спец. прим. совпадают с Повторами.txt'):
        bs_form = get_bs_title_word_form(line)
        homonyms.append(' '.join(filter(None, [
            bs_form.name,
            bs_form.idf,
            ' '.join(bs_form.info),
        ])))

    for line in get_string_list_from_file(
            'src_dict/О-мы БС совпадают с Повторами.txt'):
        bs_form = get_bs_title_word_form(line)
        homonyms.append(' '.join(filter(None, [
            bs_form.name,
            bs_form.idf,
            ' '.join(bs_form.info),
            bs_form.note.replace('.*', '').strip()
        ])))

    remaining_repetitions = []

    replays_in_groups = get_string_list_from_file(
        'out/Повторы в группах (без повторов).txt')
    replays_in_groups = [
        ' '.join(filter(None, [
            get_socket_word_form(x).name,
            get_socket_word_form(x).idf,
            ' '.join(get_socket_word_form(x).info),
            get_socket_word_form(x).note.replace('*', '').strip(),
        ]))
        for x in replays_in_groups
    ]

    for replay in replays_in_groups:
        if replay not in homonyms:
            remaining_repetitions.append(replay)

    save_list_to_file(remaining_repetitions, 'out/Повторы ост.txt')
Ejemplo n.º 7
0
def get_multirooted_homonyms():
    homonyms = list(get_string_list_from_file('out/Омонимы БГ.txt'))

    homonym_names = []

    for homonym in homonyms:
        socket_form = get_socket_word_form(homonym)
        homonym_names.append(socket_form.name)

    homonym_names = sorted(list(set(homonym_names)), key=lambda x: x.lower())

    multirooted_homonyms = []

    for name in homonym_names:
        socket_forms = []
        for homonym in homonyms:
            socket_form = get_socket_word_form(homonym)
            if socket_form.name == name:
                socket_forms.append(socket_form)

        for form in socket_forms:
            multirooted = []
            single_root = []

            if form.root_index:
                multirooted.append(form)
            else:
                single_root.append(form)

            if multirooted and single_root:
                for item in multirooted:
                    multirooted_homonyms.append(str(item))
                for item in single_root:
                    multirooted_homonyms.append(str(item))

    multirooted_homonyms = sorted(multirooted_homonyms,
                                  key=lambda x: x.lower())
    save_list_to_file(multirooted_homonyms,
                      'out/Омонимы - многокорневые и немногокорневые.txt')
Ejemplo n.º 8
0
def check_socket_bs():
    multi_root_words = get_dicts_from_csv_file('out/Многокорневые слова.csv')
    multi_root_bs_forms = get_string_list_from_file(
        'out/Многокорневые слова БС.txt')
    multi_root_bs_forms = [
        get_bs_title_word_form(x).name for x in multi_root_bs_forms
    ]

    for multi_root_word in multi_root_words:
        for root_index_key in list(multi_root_word)[1:]:
            if multi_root_word[root_index_key]:
                socket_form = get_socket_word_form(
                    multi_root_word[root_index_key])
                if socket_form.name not in multi_root_bs_forms:
                    print(socket_form)
Ejemplo n.º 9
0
def find_all_multi_rooted_words_from_bs():
    multi_root_words = get_dicts_from_csv_file(
        'src_dict/Многокорневые слова.csv')

    word_forms_bases = list(read_src_bs('src_dict/БС 03.02.21.txt'))

    multi_root_bg_forms = []

    for multi_root_word in multi_root_words:
        for root_index_key in list(multi_root_word)[1:]:
            if multi_root_word[root_index_key]:
                socket_form = get_socket_word_form(
                    multi_root_word[root_index_key]
                )
                multi_root_bg_forms.append(
                    ' '.join(filter(
                        None,
                        [
                            socket_form.name,
                            socket_form.idf,
                            ' '.join(socket_form.info),
                            socket_form.note.replace('* ', '')
                        ])))

    multi_root_bs_forms = []

    for group_word_form in word_forms_bases:
        title_form = group_word_form.title_word_form
        src_title_form = ' '.join(filter(
            None,
            [
                title_form.name,
                title_form.idf,
                ' '.join(title_form.info),
                title_form.note.replace('.* ', '')
            ]))
        if src_title_form in multi_root_bg_forms:
            print(title_form)
            multi_root_bs_forms.append(str(title_form))

    multi_root_bs_forms = sorted(
        multi_root_bs_forms,
        key=lambda x: x.replace('*', '').lower().strip()
    )

    save_list_to_file(multi_root_bs_forms, 'out/Многокорневые слова БС.txt')
Ejemplo n.º 10
0
def compare_homonyms():
    homonyms_bg = get_string_list_from_file(
        'src_dict/Омонимы БГ без этим. примечаний.txt')

    homonyms_bg_str_form = []

    for homonyms in homonyms_bg:
        socket_form = get_socket_word_form(homonyms)
        string_form = ' '.join(filter(
            None, [
                socket_form.name,
                socket_form.idf,
                ' '.join(socket_form.info),
                socket_form.note
            ]
        ))
        homonyms_bg_str_form.append(string_form)

    homonyms_bs = get_string_list_from_file(
        'src_dict/О-мы БС не совпадают с Повторами.txt')

    homonyms_bs_relevant = []
    homonyms_bs_not_relevant = []

    for homonym in homonyms_bs:
        title_form = get_bs_title_word_form(homonym)
        string_form = ' '.join(filter(
            None,
            [
                title_form.name,
                title_form.idf,
                ' '.join(title_form.info),
                title_form.note.replace('.* ', '* '),
            ]
        ))
        if string_form in homonyms_bg_str_form:
            homonyms_bs_relevant.append(homonym)
        else:
            homonyms_bs_not_relevant.append(homonym)

    save_list_to_file(homonyms_bs_relevant,
                      'out/О-мы БС совпадают с О-мами БГ.txt')
    save_list_to_file(homonyms_bs_not_relevant,
                      'out/О-мы БС не совпадают с О-мами БГ.txt')
Ejemplo n.º 11
0
def compare_replays_in_groups():
    homonyms_bs = get_string_list_from_file('src_dict/Омонимы БС.txt')

    bs_str_forms = []

    for homonym in homonyms_bs:
        title_form = get_bs_title_word_form(homonym)
        string_form = ' '.join(
            filter(None, [
                title_form.name, title_form.idf, ' '.join(title_form.info),
                title_form.note.replace('.*', '').strip()
            ]))
        bs_str_forms.append(string_form)

    relevant = []
    not_relevant = []

    with open('src_dict/Повторы в группах.txt', encoding='utf-8') as f_in:
        groups = (x.strip() for x in f_in.read().split('\n\n'))
        for group in groups:
            for line in group.split('\n')[1:]:
                if not line.startswith('!'):
                    word_form = get_socket_word_form(line)
                    line_form = ' '.join(
                        filter(None, [
                            word_form.name,
                            word_form.idf,
                            ' '.join(word_form.info),
                            word_form.note.replace('*', '').strip(),
                        ]))
                    if line_form in bs_str_forms:
                        relevant.append(line)
                    else:
                        not_relevant.append(line)

    save_list_to_file(
        sorted(relevant, key=lambda x: x.replace('*', '').strip().lower()),
        'out/19.1 Совпадающие.txt')

    save_list_to_file(
        sorted(not_relevant, key=lambda x: x.replace('*', '').strip().lower()),
        'out/19.1 Не совпадающие.txt')
Ejemplo n.º 12
0
def check_socket_bs():
    multi_root_words = get_dicts_from_csv_file('out/Многокорневые слова.csv')
    multi_root_bs_forms = get_string_list_from_file(
        'out/Многокорневые слова БС.txt')
    multi_root_bs_forms = [
        get_bs_title_word_form(x).name for x in multi_root_bs_forms
    ]

    not_found_in_bs = []

    for multi_root_word in multi_root_words:
        for root_index_key in list(multi_root_word)[1:]:
            if multi_root_word[root_index_key]:
                socket_form = get_socket_word_form(
                    multi_root_word[root_index_key])
                if socket_form.name not in multi_root_bs_forms:
                    print(socket_form)
                    not_found_in_bs.append(str(socket_form))

    save_list_to_file(not_found_in_bs, 'out/Не найденные в БС.txt')
Ejemplo n.º 13
0
def parsing_replays_in_groups():
    word_forms = []

    with open('out/Повторы в группах.txt', encoding='utf-8') as f_in:
        groups = (x.strip() for x in f_in.read().split('\n\n'))
        for group in groups:
            for line in group.split('\n')[1:]:
                if not line.startswith('!'):
                    word_forms.append(get_socket_word_form(line))

    clear_lines = [
        ' '.join(filter(None, [x.name, x.idf, ' '.join(x.info)]))
        for x in word_forms
    ]

    repeating_clear_lines = [
        x for x in clear_lines if clear_lines.count(x) > 1
    ]

    repeating_lines = []
    unique_lines = []

    for form in word_forms:
        clear_form = ' '.join(
            filter(None, [form.name, form.idf, ' '.join(form.info)]))
        if clear_form in repeating_clear_lines:
            repeating_lines.append(str(form))
        else:
            unique_lines.append(str(form))

    save_list_to_file(
        sorted(repeating_lines,
               key=lambda x: x.replace('*', '').strip().lower()),
        'out/Повторы в группах. Повторяющиеся строки.txt')

    save_list_to_file(
        sorted(unique_lines, key=lambda x: x.replace('*', '').strip().lower()),
        'out/Повторы в группах. Уникальные строки.txt')
Ejemplo n.º 14
0
def check_unique_strings():
    word_forms_bases = read_src_bs('src_dict/БС 20.02.21.txt')
    bs_title_forms = [x.title_word_form for x in word_forms_bases]
    bs_title_str_forms = [
        ' '.join(filter(None, [
            x.name,
            x.idf,
            ' '.join(x.info),
        ])) for x in bs_title_forms
    ]

    # Повторяющиеся строки
    repeating_lines = get_string_list_from_file(
        'src_dict/Повторы в группах. Повторяющиеся строки.txt')

    r_lines_resp = []
    r_lines_not_resp = []

    for line in repeating_lines:
        socket_form = get_socket_word_form(line)
        str_form = ' '.join(
            filter(None, [
                socket_form.name,
                socket_form.idf,
                ' '.join(socket_form.info),
            ]))
        if bs_title_str_forms.count(str_form) == 1:
            r_lines_resp.append(line)
        else:
            r_lines_not_resp.append(line)

    save_list_to_file(
        r_lines_resp,
        'out/Повторы в группах. Повторяющиеся строки. П.4 Правил соблюдается.txt'
    )

    save_list_to_file(
        r_lines_not_resp,
        'out/Повторы в группах. Повторяющиеся строки. П.4 Правил не соблюдается.txt'
    )

    # Уникальные строки
    unique_lines = get_string_list_from_file(
        'src_dict/Повторы в группах. Уникальные строки.txt')

    u_lines_resp = []
    u_lines_not_resp = []

    for line in unique_lines:
        socket_form = get_socket_word_form(line)
        str_form = ' '.join(
            filter(None, [
                socket_form.name,
                socket_form.idf,
                ' '.join(socket_form.info),
            ]))
        if bs_title_str_forms.count(str_form) == 1:
            u_lines_resp.append(line)
        else:
            u_lines_not_resp.append(line)

    save_list_to_file(
        u_lines_resp,
        'out/Повторы в группах. Уникальные строки. П.4 Правил соблюдается.txt')

    save_list_to_file(
        u_lines_not_resp,
        'out/Повторы в группах. Уникальные строки. П.4 Правил не соблюдается.txt'
    )
Ejemplo n.º 15
0
def compare_dicts():
    ordinary_words_bg_list = list(
        get_string_list_from_file('out/Обычные слова БГ.txt'))
    ordinary_words_bs_list = list(
        get_string_list_from_file('out/Обычные слова БС.txt'))

    bg_compare_forms = [
        get_socket_word_form(x) for x in ordinary_words_bg_list
    ]
    bg_compare_forms = [
        ' '.join(filter(None, [
            x.name,
            x.idf,
            ' '.join(x.info),
            x.note[2:],
        ])) for x in bg_compare_forms
    ]

    bs_compare_forms = [
        get_bs_title_word_form(x) for x in ordinary_words_bs_list
    ]
    bs_compare_forms = [
        ' '.join(filter(None, [
            x.name,
            x.idf,
            ' '.join(x.info),
            x.note[3:],
        ])) for x in bs_compare_forms
    ]

    intersection = list(set(bg_compare_forms) & set(bs_compare_forms))

    matching_lines = []
    bg_unique = []
    bs_unique = []

    for bg_string in ordinary_words_bg_list:
        bg_form = get_socket_word_form(bg_string)
        compare_form = ' '.join(
            filter(None, [
                bg_form.name,
                bg_form.idf,
                ' '.join(bg_form.info),
                bg_form.note[2:],
            ]))
        if compare_form in intersection:
            matching_lines.append(bg_string)
        else:
            bg_unique.append(bg_string)

    for bs_string in ordinary_words_bs_list:
        bs_form = get_bs_title_word_form(bs_string)
        compare_form = ' '.join(
            filter(None, [
                bs_form.name, bs_form.idf, ' '.join(bs_form.info),
                bs_form.note[3:]
            ]))
        if compare_form not in intersection:
            matching_lines.append(bs_unique)

    save_list_to_file(matching_lines, 'out/Строки совпадают.txt')
    save_list_to_file(bg_unique, 'out/Уникальные строки БГ.txt')
    save_list_to_file(bs_unique, 'out/Уникальные строки БС.txt')
Ejemplo n.º 16
0
def ordinary_words_bg():
    socket_group_list = list(read_src_socket_bs(
        'src_dict/БГ 01.03.21.txt'))

    exclusion_list = []

    # Многокорневые слова
    multi_root_words = get_dicts_from_csv_file(
        'src_dict/Многокорневые слова.csv')
    multi_root_bg_forms = []

    for multi_root_word in multi_root_words:
        for root_index_key in list(multi_root_word):
            if multi_root_word[root_index_key]:
                multi_root_bg_forms.append(multi_root_word[root_index_key])

    exclusion_list += multi_root_bg_forms

    # Повторы в группах
    replays_in_groups = []

    with open('src_dict/Повторы в группах.txt', encoding='utf-8') as f_in:
        groups = (x.strip() for x in f_in.read().split('\n\n'))
        for group in groups:
            for line in group.split('\n')[1:]:
                if not line.startswith('!'):
                    replays_in_groups.append(line)

    exclusion_list += replays_in_groups

    # Омонимы БГ
    homonyms_bg = get_string_list_from_file('src_dict/Омонимы БГ.txt')
    homonyms_bg_str_form = []

    for homonym in homonyms_bg:
        socket_form = get_socket_word_form(homonym)
        string_form = ' '.join(filter(
            None, [
                socket_form.invisible,
                socket_form.name,
                socket_form.root_index,
                socket_form.idf,
                ' '.join(socket_form.info),
                socket_form.note,
                socket_form.etml_note,
            ]
        ))
        homonyms_bg_str_form.append(string_form)

    exclusion_list += homonyms_bg_str_form

    # Слова, омонимичные многокорневым
    homonymous_words = get_string_list_from_file(
        'src_dict/Слова, омонимичные многокорневым словам.txt')
    homonymous_words_str_form = []

    for word in homonymous_words:
        socket_form = get_socket_word_form(word)
        string_form = ' '.join(filter(
            None, [
                socket_form.invisible,
                socket_form.name,
                socket_form.root_index,
                socket_form.idf,
                ' '.join(socket_form.info),
                socket_form.note,
                socket_form.etml_note,
            ]
        ))
        homonymous_words_str_form.append(string_form)

    exclusion_list += homonymous_words_str_form

    # Обычные слова БГ
    ordinary_words_bg_list = []

    for socket_group in socket_group_list:
        for sub_group in socket_group.sub_groups:
            for word_form in sub_group.socket_word_forms:
                if not word_form.invisible:
                    if str(word_form) not in exclusion_list:
                        ordinary_words_bg_list.append(word_form)

    ordinary_words_bg_list = [str(x) for x in sorted(ordinary_words_bg_list)]

    save_list_to_file(ordinary_words_bg_list, 'out/Обычные слова БГ.txt')
Ejemplo n.º 17
0
def get_diff_lists():
    ordinary_words_bs_list = get_string_list_from_file(
        'out/Обычные слова БС.txt')
    ordinary_words_bs_list = [
        get_bs_title_word_form(x) for x in ordinary_words_bs_list
    ]
    ordinary_words_bs_list = [
        ' '.join(
            filter(None, [
                x.name,
                x.idf,
                ' '.join(x.info),
                x.note.replace('.*', '').strip(),
            ])) for x in ordinary_words_bs_list
    ]

    ordinary_words_bg_list = get_string_list_from_file(
        'out/Обычные слова БГ.txt')
    ordinary_words_bg_list = [
        get_socket_word_form(x) for x in ordinary_words_bg_list
    ]
    ordinary_words_bg_list = [
        ' '.join(filter(None, [
            x.name,
            x.idf,
            ' '.join(x.info),
            x.note[2:],
        ])) for x in ordinary_words_bg_list
    ]

    matching_src = list(
        set(ordinary_words_bs_list)
        & set(ordinary_words_bg_list))
    unique_src_bs = list(
        set(ordinary_words_bs_list) - set(ordinary_words_bg_list))
    unique_src_bg = list(
        set(ordinary_words_bg_list) - set(ordinary_words_bs_list))

    matching_forms = []
    unique_bs_forms = []
    unique_bg_forms = []

    for word in ordinary_words_bs_list:
        title_form = get_bs_title_word_form(word)
        str_form = ' '.join(
            filter(None, [
                title_form.name,
                title_form.idf,
                ' '.join(title_form.info),
                title_form.note.replace('.*', '').strip(),
            ]))
        if str_form in matching_src:
            matching_forms.append(title_form)
        elif str_form in unique_src_bs:
            unique_bs_forms.append(title_form)

    for word in ordinary_words_bg_list:
        socket_form = get_socket_word_form(word)
        str_form = ' '.join(
            filter(None, [
                socket_form.name,
                socket_form.idf,
                ' '.join(socket_form.info),
                socket_form.note[2:],
            ]))
        if str_form in unique_src_bg:
            unique_bg_forms.append(socket_form)

    save_list_to_file(matching_forms, 'out/Строки совпадают.txt')
    save_list_to_file(unique_bs_forms, 'out/Уникальные строки БС.txt')
    save_list_to_file(unique_bg_forms, 'out/Уникальные строки БГ.txt')
Ejemplo n.º 18
0
def get_homonyms_bg():
    socket_group_list = list(read_src_socket_bs('src_dict/БГ 10.04.21.txt'))

    socket_names = []

    for socket_group in socket_group_list:
        for sub_group in socket_group.sub_groups:
            for word_form in sub_group.socket_word_forms:
                if (not word_form.invisible and not word_form.root_index):
                    socket_names.append(
                        word_form.name.replace('*', '').strip())

    socket_names = [x for x, y in Counter(socket_names).items() if y > 1]
    socket_names = sorted(list(set(socket_names)))

    all_homonyms = []

    for socket_group in socket_group_list:
        group_names = [
            x.name.replace('*', '').strip()
            for x in socket_group.socket_word_forms if not x.invisible
        ]

        for sub_group in socket_group.sub_groups:
            title_word_form = sub_group.title_word_form
            for word_form in sub_group.socket_word_forms:
                if (not word_form.invisible and not word_form.root_index):
                    raw_name = word_form.name.replace('*', '').strip()
                    if (group_names.count(raw_name) == 1
                            and raw_name in socket_names):
                        if str(word_form) == str(title_word_form):
                            all_homonyms.append(str(word_form))
                        else:
                            all_homonyms.append(' < '.join([
                                str(word_form),
                                str(title_word_form),
                            ]))

    replays_homonyms = [get_socket_word_form(x).name for x in all_homonyms]
    replays_homonyms = [
        x for x in replays_homonyms if replays_homonyms.count(x) > 1
    ]

    homonyms = []
    homonymous_repetitions = []

    for homonym in all_homonyms:
        if get_socket_word_form(homonym).name in replays_homonyms:
            homonyms.append(homonym)
        else:
            homonymous_repetitions.append(homonym)

    homonyms = sorted(homonyms,
                      key=lambda x: x.replace('*', '').strip().lower())
    homonymous_repetitions = sorted(
        homonymous_repetitions,
        key=lambda x: x.replace('*', '').strip().lower())

    save_list_to_file(homonyms, 'out/Омонимы БГ.txt')
    save_list_to_file(homonymous_repetitions,
                      'out/Слова, омонимичные повторам в группе.txt')
Ejemplo n.º 19
0
def get_homonyms_bs():
    homonyms_bs = get_string_list_from_file('src_dict/Омонимы БС.txt')

    replays_in_groups_spec_note = []  # (".* <")
    replays_in_groups = []  # (".*")

    with open('src_dict/Повторы в группах.txt', encoding='utf-8') as f_in:
        groups = (x.strip() for x in f_in.read().split('\n\n'))
        for group in groups:
            for line in group.split('\n')[1:]:
                if line.startswith('!'):
                    title_word_sub_group = line
                    title_name_sub_group = get_socket_word_form(
                        title_word_sub_group.replace('!', '').strip()).name
                else:
                    word_form = get_socket_word_form(line)
                    replays_in_groups_spec_note.append(' '.join(
                        filter(None, [
                            word_form.name,
                            word_form.idf,
                            ' '.join(word_form.info),
                            title_name_sub_group,
                        ])))
                    replays_in_groups.append(' '.join(
                        filter(None, [
                            word_form.name,
                            word_form.idf,
                            ' '.join(word_form.info),
                            word_form.note.replace('*', '').strip(),
                        ])))

    homonyms_spec_note_relevant = []
    homonyms_spec_note_not_relevant = []
    homonyms_relevant = []
    homonyms_not_relevant = []

    for homonym in homonyms_bs:
        title_form = get_bs_title_word_form(homonym)

        # имеющие специальное примечание (".* <")
        if title_form.note.startswith('.* <'):
            string_form = ' '.join(
                filter(None, [
                    title_form.name, title_form.idf, ' '.join(title_form.info),
                    title_form.note.replace('.* <', '').strip()
                ]))
            if string_form in replays_in_groups_spec_note:
                homonyms_spec_note_relevant.append(homonym)
            else:
                homonyms_spec_note_not_relevant.append(homonym)

        # НЕ имеющие специальное примечание (".* <")
        else:
            string_form = ' '.join(
                filter(None, [
                    title_form.name, title_form.idf, ' '.join(title_form.info),
                    title_form.note.replace('.*', '').strip()
                ]))
            if string_form in replays_in_groups:
                homonyms_relevant.append(homonym)
            else:
                homonyms_not_relevant.append(homonym)

    save_list_to_file(homonyms_spec_note_relevant,
                      'out/О-мы БС спец. прим. совпадают с Повторами.txt')
    save_list_to_file(homonyms_spec_note_not_relevant,
                      'out/О-мы БС спец. прим. не совпадают с Повторами.txt')
    save_list_to_file(homonyms_relevant,
                      'out/О-мы БС совпадают с Повторами.txt')
    save_list_to_file(homonyms_not_relevant,
                      'out/О-мы БС не совпадают с Повторами.txt')