def initialize(): chk_file = raw_input("What is the file to spellcheck? ") field = raw_input("What FIELD do you want to spellcheck? ") s_file = raw_input("What is name of final file? ") checker = enchant.checker.SpellChecker("en_US") cmdln = CmdLineChecker() file_data = pd.read_csv(chk_file) fields = list(file_data.apply(lambda x:'%s' % (x[field]),axis=1)) # maybe i don't even need this... #fields = strip_html(fields) corrected_text = [] for data_field in fields: checker.set_text(str(data_field)) for err in checker: print err.word print err.suggest() correct = raw_input("provide 0-index int of correct word or i to ignore, e to edit ") if correct == 'i': pass elif correct == 'e': suggest = raw_input("") err.replace(suggest) else: correct = int(correct) suggest = err.suggest()[correct] err.replace(suggest) corrected_text.append(checker.get_text()) saved_file = write_fixed_file(corrected_text, s_file)
def do_check(checker,to_check): for text in to_check: checker.set_text(text) cmdline_checker = CmdLineChecker() cmdline_checker.set_checker(checker) cmdline_checker.run() to_check[to_check.index(text)] = checker.get_text()
def spell_check_split_words(self, indices, doc, language, register=-1): """ Split misspelled words based on spell-checker suggestions. Using this is usually not a good idea unless you have an insane dictionary that contains all possible compound words in `language`. Raise :exc:`enchant.Error` if dictionary instatiation fails. """ new_indices = [] new_texts = [] re_multispace = re.compile(r" +") checker = self._get_enchant_checker(language) indices = indices or self.get_all_indices() for index in indices: subtitle = self.subtitles[index] text = subtitle.get_text(doc) text = re_multispace.sub(" ", text) checker.set_text(text) while True: try: next(checker) except StopIteration: break if checker.word.capitalize() == checker.word: # Skip capitalized words, which are usually names # and thus not always found in dictionaries. continue suggestions = [] for i, suggestion in enumerate(checker.suggest()): if suggestion.find(" ") > 0: if suggestion.replace(" ", "") == checker.word: suggestions.append(suggestion) # Split word only if only one two-word suggestion found that # has all the same characters as the original unsplit word. if len(suggestions) != 1: continue text = checker.get_text() a = checker.wordpos z = checker.wordpos + len(checker.word) checker.set_text(text[:a] + suggestions[0] + text[z:]) new_text = checker.get_text() if new_text != text: new_indices.append(index) new_texts.append(new_text) if not new_indices: return self.replace_texts(new_indices, doc, new_texts, register=register) description = _("Splitting words by spell-check suggestions") self.set_action_description(register, description)
def spell_check_join_words(self, indices, doc, language, register=-1): """ Join misspelled words based on spell-checker suggestions. Raise :exc:`enchant.Error` if dictionary instatiation fails. """ new_indices = [] new_texts = [] re_multispace = re.compile(r" +") checker = self._get_enchant_checker(language) seeker = self._get_enchant_checker(language) for index in indices or self.get_all_indices(): subtitle = self.subtitles[index] text = subtitle.get_text(doc) text = re_multispace.sub(" ", text) checker.set_text(text) while True: try: next(checker) except StopIteration: break text = checker.get_text() a = checker.wordpos z = checker.wordpos + len(checker.word) ok_with_prev = ok_with_next = False if checker.leading_context(1) == " ": seeker.set_text(text[:a - 1] + text[a:]) poss = self._get_misspelled_indices(seeker) ok_with_prev = not (a - 1) in poss if checker.trailing_context(1) == " ": seeker.set_text(text[:z] + text[z + 1:]) poss = self._get_misspelled_indices(seeker) ok_with_next = not a in poss # Join backwards or forwards if only one direction, # but not both, produce a correctly spelled result. if ok_with_prev and not ok_with_next: checker.set_text(text[:a - 1] + text[a:]) if ok_with_next and not ok_with_prev: checker.set_text(text[:z] + text[z + 1:]) new_text = checker.get_text() if new_text != text: new_indices.append(index) new_texts.append(new_text) if not new_indices: return self.replace_texts(new_indices, doc, new_texts, register=register) description = _("Joining words by spell-check suggestions") self.set_action_description(register, description)