def compare_homonyms_spec_note(): homonyms_bg = get_string_list_from_file('src_dict/Омонимы БГ.txt') homonyms_bg_str_form = [] for homonyms in homonyms_bg: socket_form = get_socket_word_form(homonyms) spec_note = socket_form.spec_note.replace('< ', '') spec_note_socket_form = get_socket_word_form(spec_note) spec_note = ' '.join( filter(None, [ spec_note_socket_form.invisible, spec_note_socket_form.name, spec_note_socket_form.root_index, spec_note_socket_form.idf, ' '.join(spec_note_socket_form.info), spec_note_socket_form.note, ])) string_form = ' '.join( filter(None, [ socket_form.name, socket_form.idf, ' '.join(socket_form.info), spec_note, ])) homonyms_bg_str_form.append(string_form) homonyms_spec_note = get_string_list_from_file( 'src_dict/О-мы БС спец. прим. не совпадают с Повторами.txt') homonyms_spec_note_relevant = [] homonyms_spec_note_not_relevant = [] for homonym in homonyms_spec_note: title_form = get_bs_title_word_form(homonym) print(title_form.name, title_form.note) string_form = ' '.join( filter(None, [ title_form.name, title_form.idf, ' '.join(title_form.info), title_form.note.replace('.* < ', ''), ])) if string_form in homonyms_bg_str_form: homonyms_spec_note_relevant.append(homonym) else: homonyms_spec_note_not_relevant.append(homonym) save_list_to_file(homonyms_spec_note_relevant, 'out/О-мы БС спец. прим. совпадают с О-мами БГ.txt') save_list_to_file(homonyms_spec_note_not_relevant, 'out/О-мы БС спец. прим. не совпадают с О-мами БГ.txt')
def get_homonymous_multi_rooted(): multi_root_words = get_dicts_from_csv_file( 'src_dict/Многокорневые слова.csv') multi_root_names = [] for multi_root_word in multi_root_words: for root_index_key in list(multi_root_word): if multi_root_word[root_index_key]: multi_root_names.append( get_socket_word_form(multi_root_word[root_index_key]).name) socket_group_list = list(read_src_socket_bs('src_dict/БГ 10.04.21.txt')) homonymous_multi_rooted = [] for socket_group in socket_group_list: for sub_group in socket_group.sub_groups: title_word_form = sub_group.title_word_form for word_form in sub_group.socket_word_forms: if (not word_form.invisible and not word_form.root_index): if word_form.name in multi_root_names: print(word_form) if str(word_form) == str(title_word_form): homonymous_multi_rooted.append(str(word_form)) else: homonymous_multi_rooted.append(' < '.join([ str(word_form), str(title_word_form), ])) homonymous_multi_rooted = sorted( homonymous_multi_rooted, key=lambda x: x.replace('*', '').strip().lower()) save_list_to_file(homonymous_multi_rooted, 'out/Слова, омонимичные многокорневым словам.txt')
def get_capital_letter_bg(): capital_letter_bg = list( get_string_list_from_file('out/Большая буква. БГ.txt')) socket_group_list = list(read_src_socket_bs('src_dict/БГ 13.03.21.txt')) capital_letter_bg_homonyms = [] for capital_word in capital_letter_bg: capital_word_name = get_socket_word_form(capital_word).name word_form_list = [] for socket_group in socket_group_list: for sub_group in socket_group.sub_groups: for word_form in sub_group.socket_word_forms: if not word_form.invisible: form_name = word_form.name.replace('*', '') if form_name == capital_word_name.lower(): print(word_form) word_form_list.append(str(word_form)) if word_form_list: capital_letter_bg_homonyms.append(capital_word) capital_letter_bg_homonyms += word_form_list capital_letter_bg_homonyms.append('') save_list_to_file(capital_letter_bg_homonyms[:-1], 'out/Большая буква. БГ. Омонимы.txt')
def get_bg_abbreviation_homonyms(): abbreviation_bg = get_string_list_from_file('out/Аббревиатура. БГ.txt') abbreviation_bg = sorted(list(set(abbreviation_bg))) socket_group_list = list(read_src_socket_bs('src_dict/БГ 13.03.21.txt')) bg_abbreviation_homonyms = [] for abbreviation in abbreviation_bg: abbreviation_name = get_socket_word_form(abbreviation).name word_form_list = [] for socket_group in socket_group_list: for sub_group in socket_group.sub_groups: for word_form in sub_group.socket_word_forms: if not word_form.invisible: form_name = word_form.name.replace('*', '') if form_name == abbreviation_name.lower(): print(form_name) word_form_list.append(str(word_form)) if word_form_list: bg_abbreviation_homonyms.append(abbreviation) bg_abbreviation_homonyms += word_form_list bg_abbreviation_homonyms.append('') save_list_to_file(bg_abbreviation_homonyms[:-1], 'out/Аббревиатура. БГ. Омонимы.txt')
def find_all_multi_rooted_words_from_bs(): """ 15. Найти в док-те БС 06.04.21.txt все слова (ЗС групп и одиночки) из док-та Многокорневые слова.xlsx и создать список строк с такими словами - док-т Многокорневые слова БС.txt . Учитывая п.п. 1 и 2 Правил соотношения БГ и БС, сравнить каждую строку док-та Многокорневые слова БС.txt с каждой ячейкой со словом в док-те Многокорневые слова.xlsx . """ multi_root_words = get_dicts_from_csv_file( 'out/Многокорневые слова.csv') word_forms_bases = list(read_src_bs('src_dict/БС 06.04.21.txt')) multi_root_bg_forms = [] for multi_root_word in multi_root_words: for root_index_key in list(multi_root_word)[1:]: if multi_root_word[root_index_key]: socket_form = get_socket_word_form( multi_root_word[root_index_key] ) multi_root_bg_forms.append( ' '.join(filter( None, [ socket_form.name, socket_form.idf, ' '.join(socket_form.info), socket_form.note.replace('* ', ''), ]))) multi_root_bs_forms = [] for group_word_form in word_forms_bases: title_form = group_word_form.title_word_form src_title_form = ' '.join(filter( None, [ title_form.name, title_form.idf, ' '.join(title_form.info), (title_form.note.replace('.* ', '') if '<' not in title_form.note else None), ])) if src_title_form in multi_root_bg_forms: print(title_form) multi_root_bs_forms.append(str(title_form)) multi_root_bs_forms = sorted( multi_root_bs_forms, key=lambda x: x.replace('*', '').lower().strip() ) save_list_to_file(multi_root_bs_forms, 'out/Многокорневые слова БС.txt')
def get_remaining_repetitions(): homonyms = [] for line in get_string_list_from_file( 'src_dict/О-мы БС спец. прим. совпадают с Повторами.txt'): bs_form = get_bs_title_word_form(line) homonyms.append(' '.join(filter(None, [ bs_form.name, bs_form.idf, ' '.join(bs_form.info), ]))) for line in get_string_list_from_file( 'src_dict/О-мы БС совпадают с Повторами.txt'): bs_form = get_bs_title_word_form(line) homonyms.append(' '.join(filter(None, [ bs_form.name, bs_form.idf, ' '.join(bs_form.info), bs_form.note.replace('.*', '').strip() ]))) remaining_repetitions = [] replays_in_groups = get_string_list_from_file( 'out/Повторы в группах (без повторов).txt') replays_in_groups = [ ' '.join(filter(None, [ get_socket_word_form(x).name, get_socket_word_form(x).idf, ' '.join(get_socket_word_form(x).info), get_socket_word_form(x).note.replace('*', '').strip(), ])) for x in replays_in_groups ] for replay in replays_in_groups: if replay not in homonyms: remaining_repetitions.append(replay) save_list_to_file(remaining_repetitions, 'out/Повторы ост.txt')
def get_multirooted_homonyms(): homonyms = list(get_string_list_from_file('out/Омонимы БГ.txt')) homonym_names = [] for homonym in homonyms: socket_form = get_socket_word_form(homonym) homonym_names.append(socket_form.name) homonym_names = sorted(list(set(homonym_names)), key=lambda x: x.lower()) multirooted_homonyms = [] for name in homonym_names: socket_forms = [] for homonym in homonyms: socket_form = get_socket_word_form(homonym) if socket_form.name == name: socket_forms.append(socket_form) for form in socket_forms: multirooted = [] single_root = [] if form.root_index: multirooted.append(form) else: single_root.append(form) if multirooted and single_root: for item in multirooted: multirooted_homonyms.append(str(item)) for item in single_root: multirooted_homonyms.append(str(item)) multirooted_homonyms = sorted(multirooted_homonyms, key=lambda x: x.lower()) save_list_to_file(multirooted_homonyms, 'out/Омонимы - многокорневые и немногокорневые.txt')
def check_socket_bs(): multi_root_words = get_dicts_from_csv_file('out/Многокорневые слова.csv') multi_root_bs_forms = get_string_list_from_file( 'out/Многокорневые слова БС.txt') multi_root_bs_forms = [ get_bs_title_word_form(x).name for x in multi_root_bs_forms ] for multi_root_word in multi_root_words: for root_index_key in list(multi_root_word)[1:]: if multi_root_word[root_index_key]: socket_form = get_socket_word_form( multi_root_word[root_index_key]) if socket_form.name not in multi_root_bs_forms: print(socket_form)
def find_all_multi_rooted_words_from_bs(): multi_root_words = get_dicts_from_csv_file( 'src_dict/Многокорневые слова.csv') word_forms_bases = list(read_src_bs('src_dict/БС 03.02.21.txt')) multi_root_bg_forms = [] for multi_root_word in multi_root_words: for root_index_key in list(multi_root_word)[1:]: if multi_root_word[root_index_key]: socket_form = get_socket_word_form( multi_root_word[root_index_key] ) multi_root_bg_forms.append( ' '.join(filter( None, [ socket_form.name, socket_form.idf, ' '.join(socket_form.info), socket_form.note.replace('* ', '') ]))) multi_root_bs_forms = [] for group_word_form in word_forms_bases: title_form = group_word_form.title_word_form src_title_form = ' '.join(filter( None, [ title_form.name, title_form.idf, ' '.join(title_form.info), title_form.note.replace('.* ', '') ])) if src_title_form in multi_root_bg_forms: print(title_form) multi_root_bs_forms.append(str(title_form)) multi_root_bs_forms = sorted( multi_root_bs_forms, key=lambda x: x.replace('*', '').lower().strip() ) save_list_to_file(multi_root_bs_forms, 'out/Многокорневые слова БС.txt')
def compare_homonyms(): homonyms_bg = get_string_list_from_file( 'src_dict/Омонимы БГ без этим. примечаний.txt') homonyms_bg_str_form = [] for homonyms in homonyms_bg: socket_form = get_socket_word_form(homonyms) string_form = ' '.join(filter( None, [ socket_form.name, socket_form.idf, ' '.join(socket_form.info), socket_form.note ] )) homonyms_bg_str_form.append(string_form) homonyms_bs = get_string_list_from_file( 'src_dict/О-мы БС не совпадают с Повторами.txt') homonyms_bs_relevant = [] homonyms_bs_not_relevant = [] for homonym in homonyms_bs: title_form = get_bs_title_word_form(homonym) string_form = ' '.join(filter( None, [ title_form.name, title_form.idf, ' '.join(title_form.info), title_form.note.replace('.* ', '* '), ] )) if string_form in homonyms_bg_str_form: homonyms_bs_relevant.append(homonym) else: homonyms_bs_not_relevant.append(homonym) save_list_to_file(homonyms_bs_relevant, 'out/О-мы БС совпадают с О-мами БГ.txt') save_list_to_file(homonyms_bs_not_relevant, 'out/О-мы БС не совпадают с О-мами БГ.txt')
def compare_replays_in_groups(): homonyms_bs = get_string_list_from_file('src_dict/Омонимы БС.txt') bs_str_forms = [] for homonym in homonyms_bs: title_form = get_bs_title_word_form(homonym) string_form = ' '.join( filter(None, [ title_form.name, title_form.idf, ' '.join(title_form.info), title_form.note.replace('.*', '').strip() ])) bs_str_forms.append(string_form) relevant = [] not_relevant = [] with open('src_dict/Повторы в группах.txt', encoding='utf-8') as f_in: groups = (x.strip() for x in f_in.read().split('\n\n')) for group in groups: for line in group.split('\n')[1:]: if not line.startswith('!'): word_form = get_socket_word_form(line) line_form = ' '.join( filter(None, [ word_form.name, word_form.idf, ' '.join(word_form.info), word_form.note.replace('*', '').strip(), ])) if line_form in bs_str_forms: relevant.append(line) else: not_relevant.append(line) save_list_to_file( sorted(relevant, key=lambda x: x.replace('*', '').strip().lower()), 'out/19.1 Совпадающие.txt') save_list_to_file( sorted(not_relevant, key=lambda x: x.replace('*', '').strip().lower()), 'out/19.1 Не совпадающие.txt')
def check_socket_bs(): multi_root_words = get_dicts_from_csv_file('out/Многокорневые слова.csv') multi_root_bs_forms = get_string_list_from_file( 'out/Многокорневые слова БС.txt') multi_root_bs_forms = [ get_bs_title_word_form(x).name for x in multi_root_bs_forms ] not_found_in_bs = [] for multi_root_word in multi_root_words: for root_index_key in list(multi_root_word)[1:]: if multi_root_word[root_index_key]: socket_form = get_socket_word_form( multi_root_word[root_index_key]) if socket_form.name not in multi_root_bs_forms: print(socket_form) not_found_in_bs.append(str(socket_form)) save_list_to_file(not_found_in_bs, 'out/Не найденные в БС.txt')
def parsing_replays_in_groups(): word_forms = [] with open('out/Повторы в группах.txt', encoding='utf-8') as f_in: groups = (x.strip() for x in f_in.read().split('\n\n')) for group in groups: for line in group.split('\n')[1:]: if not line.startswith('!'): word_forms.append(get_socket_word_form(line)) clear_lines = [ ' '.join(filter(None, [x.name, x.idf, ' '.join(x.info)])) for x in word_forms ] repeating_clear_lines = [ x for x in clear_lines if clear_lines.count(x) > 1 ] repeating_lines = [] unique_lines = [] for form in word_forms: clear_form = ' '.join( filter(None, [form.name, form.idf, ' '.join(form.info)])) if clear_form in repeating_clear_lines: repeating_lines.append(str(form)) else: unique_lines.append(str(form)) save_list_to_file( sorted(repeating_lines, key=lambda x: x.replace('*', '').strip().lower()), 'out/Повторы в группах. Повторяющиеся строки.txt') save_list_to_file( sorted(unique_lines, key=lambda x: x.replace('*', '').strip().lower()), 'out/Повторы в группах. Уникальные строки.txt')
def check_unique_strings(): word_forms_bases = read_src_bs('src_dict/БС 20.02.21.txt') bs_title_forms = [x.title_word_form for x in word_forms_bases] bs_title_str_forms = [ ' '.join(filter(None, [ x.name, x.idf, ' '.join(x.info), ])) for x in bs_title_forms ] # Повторяющиеся строки repeating_lines = get_string_list_from_file( 'src_dict/Повторы в группах. Повторяющиеся строки.txt') r_lines_resp = [] r_lines_not_resp = [] for line in repeating_lines: socket_form = get_socket_word_form(line) str_form = ' '.join( filter(None, [ socket_form.name, socket_form.idf, ' '.join(socket_form.info), ])) if bs_title_str_forms.count(str_form) == 1: r_lines_resp.append(line) else: r_lines_not_resp.append(line) save_list_to_file( r_lines_resp, 'out/Повторы в группах. Повторяющиеся строки. П.4 Правил соблюдается.txt' ) save_list_to_file( r_lines_not_resp, 'out/Повторы в группах. Повторяющиеся строки. П.4 Правил не соблюдается.txt' ) # Уникальные строки unique_lines = get_string_list_from_file( 'src_dict/Повторы в группах. Уникальные строки.txt') u_lines_resp = [] u_lines_not_resp = [] for line in unique_lines: socket_form = get_socket_word_form(line) str_form = ' '.join( filter(None, [ socket_form.name, socket_form.idf, ' '.join(socket_form.info), ])) if bs_title_str_forms.count(str_form) == 1: u_lines_resp.append(line) else: u_lines_not_resp.append(line) save_list_to_file( u_lines_resp, 'out/Повторы в группах. Уникальные строки. П.4 Правил соблюдается.txt') save_list_to_file( u_lines_not_resp, 'out/Повторы в группах. Уникальные строки. П.4 Правил не соблюдается.txt' )
def compare_dicts(): ordinary_words_bg_list = list( get_string_list_from_file('out/Обычные слова БГ.txt')) ordinary_words_bs_list = list( get_string_list_from_file('out/Обычные слова БС.txt')) bg_compare_forms = [ get_socket_word_form(x) for x in ordinary_words_bg_list ] bg_compare_forms = [ ' '.join(filter(None, [ x.name, x.idf, ' '.join(x.info), x.note[2:], ])) for x in bg_compare_forms ] bs_compare_forms = [ get_bs_title_word_form(x) for x in ordinary_words_bs_list ] bs_compare_forms = [ ' '.join(filter(None, [ x.name, x.idf, ' '.join(x.info), x.note[3:], ])) for x in bs_compare_forms ] intersection = list(set(bg_compare_forms) & set(bs_compare_forms)) matching_lines = [] bg_unique = [] bs_unique = [] for bg_string in ordinary_words_bg_list: bg_form = get_socket_word_form(bg_string) compare_form = ' '.join( filter(None, [ bg_form.name, bg_form.idf, ' '.join(bg_form.info), bg_form.note[2:], ])) if compare_form in intersection: matching_lines.append(bg_string) else: bg_unique.append(bg_string) for bs_string in ordinary_words_bs_list: bs_form = get_bs_title_word_form(bs_string) compare_form = ' '.join( filter(None, [ bs_form.name, bs_form.idf, ' '.join(bs_form.info), bs_form.note[3:] ])) if compare_form not in intersection: matching_lines.append(bs_unique) save_list_to_file(matching_lines, 'out/Строки совпадают.txt') save_list_to_file(bg_unique, 'out/Уникальные строки БГ.txt') save_list_to_file(bs_unique, 'out/Уникальные строки БС.txt')
def ordinary_words_bg(): socket_group_list = list(read_src_socket_bs( 'src_dict/БГ 01.03.21.txt')) exclusion_list = [] # Многокорневые слова multi_root_words = get_dicts_from_csv_file( 'src_dict/Многокорневые слова.csv') multi_root_bg_forms = [] for multi_root_word in multi_root_words: for root_index_key in list(multi_root_word): if multi_root_word[root_index_key]: multi_root_bg_forms.append(multi_root_word[root_index_key]) exclusion_list += multi_root_bg_forms # Повторы в группах replays_in_groups = [] with open('src_dict/Повторы в группах.txt', encoding='utf-8') as f_in: groups = (x.strip() for x in f_in.read().split('\n\n')) for group in groups: for line in group.split('\n')[1:]: if not line.startswith('!'): replays_in_groups.append(line) exclusion_list += replays_in_groups # Омонимы БГ homonyms_bg = get_string_list_from_file('src_dict/Омонимы БГ.txt') homonyms_bg_str_form = [] for homonym in homonyms_bg: socket_form = get_socket_word_form(homonym) string_form = ' '.join(filter( None, [ socket_form.invisible, socket_form.name, socket_form.root_index, socket_form.idf, ' '.join(socket_form.info), socket_form.note, socket_form.etml_note, ] )) homonyms_bg_str_form.append(string_form) exclusion_list += homonyms_bg_str_form # Слова, омонимичные многокорневым homonymous_words = get_string_list_from_file( 'src_dict/Слова, омонимичные многокорневым словам.txt') homonymous_words_str_form = [] for word in homonymous_words: socket_form = get_socket_word_form(word) string_form = ' '.join(filter( None, [ socket_form.invisible, socket_form.name, socket_form.root_index, socket_form.idf, ' '.join(socket_form.info), socket_form.note, socket_form.etml_note, ] )) homonymous_words_str_form.append(string_form) exclusion_list += homonymous_words_str_form # Обычные слова БГ ordinary_words_bg_list = [] for socket_group in socket_group_list: for sub_group in socket_group.sub_groups: for word_form in sub_group.socket_word_forms: if not word_form.invisible: if str(word_form) not in exclusion_list: ordinary_words_bg_list.append(word_form) ordinary_words_bg_list = [str(x) for x in sorted(ordinary_words_bg_list)] save_list_to_file(ordinary_words_bg_list, 'out/Обычные слова БГ.txt')
def get_diff_lists(): ordinary_words_bs_list = get_string_list_from_file( 'out/Обычные слова БС.txt') ordinary_words_bs_list = [ get_bs_title_word_form(x) for x in ordinary_words_bs_list ] ordinary_words_bs_list = [ ' '.join( filter(None, [ x.name, x.idf, ' '.join(x.info), x.note.replace('.*', '').strip(), ])) for x in ordinary_words_bs_list ] ordinary_words_bg_list = get_string_list_from_file( 'out/Обычные слова БГ.txt') ordinary_words_bg_list = [ get_socket_word_form(x) for x in ordinary_words_bg_list ] ordinary_words_bg_list = [ ' '.join(filter(None, [ x.name, x.idf, ' '.join(x.info), x.note[2:], ])) for x in ordinary_words_bg_list ] matching_src = list( set(ordinary_words_bs_list) & set(ordinary_words_bg_list)) unique_src_bs = list( set(ordinary_words_bs_list) - set(ordinary_words_bg_list)) unique_src_bg = list( set(ordinary_words_bg_list) - set(ordinary_words_bs_list)) matching_forms = [] unique_bs_forms = [] unique_bg_forms = [] for word in ordinary_words_bs_list: title_form = get_bs_title_word_form(word) str_form = ' '.join( filter(None, [ title_form.name, title_form.idf, ' '.join(title_form.info), title_form.note.replace('.*', '').strip(), ])) if str_form in matching_src: matching_forms.append(title_form) elif str_form in unique_src_bs: unique_bs_forms.append(title_form) for word in ordinary_words_bg_list: socket_form = get_socket_word_form(word) str_form = ' '.join( filter(None, [ socket_form.name, socket_form.idf, ' '.join(socket_form.info), socket_form.note[2:], ])) if str_form in unique_src_bg: unique_bg_forms.append(socket_form) save_list_to_file(matching_forms, 'out/Строки совпадают.txt') save_list_to_file(unique_bs_forms, 'out/Уникальные строки БС.txt') save_list_to_file(unique_bg_forms, 'out/Уникальные строки БГ.txt')
def get_homonyms_bg(): socket_group_list = list(read_src_socket_bs('src_dict/БГ 10.04.21.txt')) socket_names = [] for socket_group in socket_group_list: for sub_group in socket_group.sub_groups: for word_form in sub_group.socket_word_forms: if (not word_form.invisible and not word_form.root_index): socket_names.append( word_form.name.replace('*', '').strip()) socket_names = [x for x, y in Counter(socket_names).items() if y > 1] socket_names = sorted(list(set(socket_names))) all_homonyms = [] for socket_group in socket_group_list: group_names = [ x.name.replace('*', '').strip() for x in socket_group.socket_word_forms if not x.invisible ] for sub_group in socket_group.sub_groups: title_word_form = sub_group.title_word_form for word_form in sub_group.socket_word_forms: if (not word_form.invisible and not word_form.root_index): raw_name = word_form.name.replace('*', '').strip() if (group_names.count(raw_name) == 1 and raw_name in socket_names): if str(word_form) == str(title_word_form): all_homonyms.append(str(word_form)) else: all_homonyms.append(' < '.join([ str(word_form), str(title_word_form), ])) replays_homonyms = [get_socket_word_form(x).name for x in all_homonyms] replays_homonyms = [ x for x in replays_homonyms if replays_homonyms.count(x) > 1 ] homonyms = [] homonymous_repetitions = [] for homonym in all_homonyms: if get_socket_word_form(homonym).name in replays_homonyms: homonyms.append(homonym) else: homonymous_repetitions.append(homonym) homonyms = sorted(homonyms, key=lambda x: x.replace('*', '').strip().lower()) homonymous_repetitions = sorted( homonymous_repetitions, key=lambda x: x.replace('*', '').strip().lower()) save_list_to_file(homonyms, 'out/Омонимы БГ.txt') save_list_to_file(homonymous_repetitions, 'out/Слова, омонимичные повторам в группе.txt')
def get_homonyms_bs(): homonyms_bs = get_string_list_from_file('src_dict/Омонимы БС.txt') replays_in_groups_spec_note = [] # (".* <") replays_in_groups = [] # (".*") with open('src_dict/Повторы в группах.txt', encoding='utf-8') as f_in: groups = (x.strip() for x in f_in.read().split('\n\n')) for group in groups: for line in group.split('\n')[1:]: if line.startswith('!'): title_word_sub_group = line title_name_sub_group = get_socket_word_form( title_word_sub_group.replace('!', '').strip()).name else: word_form = get_socket_word_form(line) replays_in_groups_spec_note.append(' '.join( filter(None, [ word_form.name, word_form.idf, ' '.join(word_form.info), title_name_sub_group, ]))) replays_in_groups.append(' '.join( filter(None, [ word_form.name, word_form.idf, ' '.join(word_form.info), word_form.note.replace('*', '').strip(), ]))) homonyms_spec_note_relevant = [] homonyms_spec_note_not_relevant = [] homonyms_relevant = [] homonyms_not_relevant = [] for homonym in homonyms_bs: title_form = get_bs_title_word_form(homonym) # имеющие специальное примечание (".* <") if title_form.note.startswith('.* <'): string_form = ' '.join( filter(None, [ title_form.name, title_form.idf, ' '.join(title_form.info), title_form.note.replace('.* <', '').strip() ])) if string_form in replays_in_groups_spec_note: homonyms_spec_note_relevant.append(homonym) else: homonyms_spec_note_not_relevant.append(homonym) # НЕ имеющие специальное примечание (".* <") else: string_form = ' '.join( filter(None, [ title_form.name, title_form.idf, ' '.join(title_form.info), title_form.note.replace('.*', '').strip() ])) if string_form in replays_in_groups: homonyms_relevant.append(homonym) else: homonyms_not_relevant.append(homonym) save_list_to_file(homonyms_spec_note_relevant, 'out/О-мы БС спец. прим. совпадают с Повторами.txt') save_list_to_file(homonyms_spec_note_not_relevant, 'out/О-мы БС спец. прим. не совпадают с Повторами.txt') save_list_to_file(homonyms_relevant, 'out/О-мы БС совпадают с Повторами.txt') save_list_to_file(homonyms_not_relevant, 'out/О-мы БС не совпадают с Повторами.txt')