def save_files(the_lines: typing.Union[typing.List[str], None] = None, working_filename: typing.Union[Path, None] = None) -> None: """Give the user the option (possibly) to save the modified-in-place verified text (stored in global variable THE_LINES), plus, if modified, the list of words to always skip. """ global apostrophe_words_filename, always_capitalize_list_filename # semi-constant module configuration params global apostrophe_words, always_capitalize_list global original_always_capitalize_list global text_source_file_changed if working_filename: assert isinstance(working_filename, Path) if apostrophe_words_filename: assert isinstance(apostrophe_words_filename, Path) if always_capitalize_list_filename: assert isinstance(always_capitalize_list_filename, Path) if text_source_file_changed: if the_lines and working_filename: choice = comparative_form(multi_choice_menu.menu_choice(save_data_menu, 'Overwrite file "%s" with modified text?' % working_filename.name)) if choice == 'y': with working_filename.open('w') as f: f.writelines(the_lines) else: print('No changes made in this file, moving on ...\n\n') always_capitalize_list.sort() # FIXME! Is this happening when called from a module? if always_capitalize_list != original_always_capitalize_list: print('\n\n') choice = comparative_form(multi_choice_menu.menu_choice(save_data_menu, 'List of always-capitalize words "%s" modified. Save new list?' % always_capitalize_list_filename.name)) if choice == 'y': always_capitalize_list_filename = always_capitalize_list_filename or file_utils.do_open_dialog() with always_capitalize_list_filename.open('w') as f: f.writelines(sorted(list(set([comparative_form(line) + '\n' for line in always_capitalize_list])))) original_always_capitalize_list = always_capitalize_list apostrophe_words.sort() if apostrophe_words != original_apostrophe_words: print('\n\n') choice = comparative_form(multi_choice_menu.menu_choice(save_data_menu, 'List of begin-with-apostrophe words "%s" modified. Save new list?' % apostrophe_words_filename.name)) if choice == 'y': apostrophe_words_filename = apostrophe_words_filename or file_utils.do_open_dialog() with apostrophe_words_filename.open('w') as f: f.writelines(sorted(list(set(['’%s\n' % comparative_form(line).lstrip("’'") for line in apostrophe_words]))))
def save_files(the_lines, the_filename, the_always_capitalize_list, the_always_capitalize_list_filename): """Give the user the option to save the modified-in-place verified text (stored in global variable THE_LINES), plus, if modified, the list of words to always skip. Parameters: the_lines List of lines to be written back to the original file. the_filename Path/name of the original file to be overwritten. the_always_capitalize_list List of words always to capitalize. the_always_capitalize_list_filename Location of always-capitalize list. Returns a tuple: ( the [possibly modified] THE_ALWAYS_CAPITALIZE_LIST, the [possibly modified] THE_ALWAYS_CAPITALIZE_LIST_FILENAME, ) """ global original_always_capitalize_list the_menu = OrderedDict([ # Use this same menu for both questions ('Y', "Overwrite the old data"), ('N', 'Cancel and lose the changes') ]) choice = comparative_form(multi_choice_menu.menu_choice(the_menu, 'Overwrite file "%s" with modified text?' % os.path.split(filename)[1])) if choice == 'y': with open(the_filename, 'w') as f: f.writelines(the_lines) the_always_capitalize_list.sort() if the_always_capitalize_list != original_always_capitalize_list: print('\n\n') choice = comparative_form(multi_choice_menu.menu_choice(the_menu, 'List of always-capitalize words "%s" modified. Save new list?' % os.path.split(the_always_capitalize_list_filename)[1])) if choice == 'y': the_always_capitalize_list_filename = the_always_capitalize_list_filename or sfp.do_open_dialog() with open(the_always_capitalize_list_filename, 'w') as f: f.writelines(sorted(list(set([ comparative_form(line) + '\n' for line in the_always_capitalize_list ])))) return the_always_capitalize_list, the_always_capitalize_list_filename
def check_word_capitalization(tagged_sentence, word_number, allow_always_correct=False): """Give the user a choice of whether to correct the capitalization of the word or not to correct the capitalization of the word. Returns True if the capitalization NEEDS TO BE ALTERED; False if capitalization IS ALREADY CORRECT. This routine modifies the global list always_capitalize_list. """ global the_lines, filename, always_capitalize_list, always_capitalize_list_filename # In case we abort and save. the_word = tagged_sentence[word_number][0] if comparative_form(the_word) in always_capitalize_list: return True else: # First, reassemble the sentence, except capitalize the entire word whose capitalization is in question context_sentence = '' count = 0 for w, _ in tagged_sentence: if count == word_number: w = w.upper() count += 1 context_sentence = "%s%s" % (context_sentence, w) if w in punc else "%s %s" % (context_sentence, w) print() verb = "is" if the_word[0].isupper() else "is not" question = 'POSSIBLE ERROR DETECTED: the word "%s" %s capitalized. Is this wrong?' % (puncstrip(the_word), verb) text_handling.print_indented(question, 2) print() text_handling.print_indented('CONTEXT: %s\n' % context_sentence, 2) the_menu = OrderedDict([]) the_menu['Y'] = ("Decapitalize" if the_word[0].isupper() else "Capitalize") + " this word" the_menu['N'] = 'Leave this word as-is' if allow_always_correct: the_menu['A'] = "Always capitalize this word" the_menu['Q'] = 'Quit, with option to save changes' choice = comparative_form(multi_choice_menu.menu_choice(the_menu, "What would you like to do?")) if choice == 'a': always_capitalize_list += [ comparative_form(the_word) ] choice = "n" if the_word[0].isupper() else "y" elif choice == 'q': save_files(the_lines, filename, always_capitalize_list, always_capitalize_list_filename) print('\nQuitting ...') sys.exit(0) ret = choice.lower() == 'y' return ret
def check_word_capitalization(tagged_sentence: typing.List[typing.Tuple[str, str]], # CHECK: is this correct? word_number: int, allow_always_correct: bool, # The rest of these parameters are just in case we have to save while quitting in the # middle of the run. They can be None; if either is, saving is not an option that's # offered to the user. the_lines: typing.Union[typing.List[str], None] = None, working_filename: typing.Union[Path, None] = None, ) -> bool: """Give the user a choice of whether to correct the capitalization of word number WORD_NUMBER in TAGGED_SENTENCE or not to correct the capitalization of that word. The "tagged" in TAGGED_SENTENCE means "POS-tagged by NLTK." If ALLOW_ALWAYS_CORRECT is True, the user is given the option to always capitalize this word; otherwise, the user is not given this option. """ global text_source_file_changed global always_capitalize_list, apostrophe_words if working_filename: assert isinstance(working_filename, Path) the_word = tagged_sentence[word_number][0] if comparative_form(the_word) in always_capitalize_list: return True else: # First, reassemble the sentence, except capitalize the entire word whose capitalization is in question context_sentence = '' count = 0 for w, _ in tagged_sentence: if count == word_number: w = w.upper() count += 1 context_sentence = "%s%s" % (context_sentence, w) if w in punc else "%s %s" % (context_sentence, w) print() verb = "is" if th.is_capitalized(the_word) else "is not" question = 'POSSIBLE ERROR DETECTED: the word "%s" %s capitalized. Is this wrong?' % (comparative_form(the_word), verb) th.print_indented(question, 2) print() th.print_indented('CONTEXT: %s\n' % context_sentence, 2) the_menu = OrderedDict([]) the_menu['Y'] = ("Decapitalize" if th.is_capitalized(the_word) else "Capitalize") + " this word" the_menu['N'] = 'Leave this word as-is' if allow_always_correct: the_menu['A'] = "Always capitalize this word" if th.begins_with_apostrophe(the_word) and comparative_form(the_word).strip("’'") not in apostrophe_words: the_menu['D'] = "Allow this word to begin with an apostrophe" if the_word.strip().startswith("'") and comparative_form(the_word).strip("’'") not in apostrophe_words: the_menu['C'] = "Correct initial apostrophe ( ' ) to opening quote ( ‘ )" if the_lines and working_filename: the_menu['Q'] = 'Quit, with option to save training data (but not modified text)' choice = comparative_form(multi_choice_menu.menu_choice(the_menu, "What would you like to do?")) if choice == 'a': always_capitalize_list += [comparative_form(the_word)] choice = "n" if th.is_capitalized(the_word) else "y" elif choice == 'q': # FIXME: we should really be able to save the modified source text. # The text file hasn't been fully reassembled yet, so we can't save it! Pass other parameters, though. # This branch only available if THE_LINES and WORKING_FILENAME were specified as parameters. save_files() print('\nQuitting ...') sys.exit(0) elif choice == "d": apostrophe_words += [the_word[0] + comparative_form(the_word).strip("’'") ] # Add the word to the list ... return check_word_capitalization(tagged_sentence, word_number, allow_always_correct) # And check again. elif choice == "c": tagged_sentence[word_number] = ("‘" + the_word.strip().lstrip("'"), tagged_sentence[word_number][1]) return check_word_capitalization(tagged_sentence, word_number, allow_always_correct) # And check again. ret = choice.lower() == 'y' return ret