def spell_check_split_words(self, indices, doc, language, register=-1): """Split misspelled words based on spell-checker suggestions. Using this is usually not a good idea unless you have an insane dictionary that contains all possible compound words in `language`. Raise :exc:`enchant.Error` if dictionary instatiation fails. """ new_indices = [] new_texts = [] re_multispace = re.compile(r" +") checker = self._get_enchant_checker(language) indices = indices or self.get_all_indices() for index in indices: subtitle = self.subtitles[index] text = subtitle.get_text(doc) text = re_multispace.sub(" ", text) checker.set_text(unicode(text)) while True: try: checker.next() except StopIteration: break if checker.word.capitalize() == checker.word: # Skip capitalized words, which are usually names # and thus not always found in dictionaries. continue length = len(checker.word) suggestions = [] for i, suggestion in enumerate(checker.suggest()): if suggestion.find(" ") > 0: if suggestion.replace(" ", "") == checker.word: suggestions.append(suggestion) # Split word only if only one two-word suggestion found that # has all the same characters as the original unsplit word. if len(suggestions) != 1: continue text = checker.get_text() a = checker.wordpos z = checker.wordpos + len(checker.word) checker.set_text(text[:a] + suggestions[0] + text[z:]) new_text = unicode(checker.get_text()) if new_text != text: new_indices.append(index) new_texts.append(new_text) if not new_indices: return self.replace_texts(new_indices, doc, new_texts, register=register) description = _("Splitting words by spell-check suggestions") self.set_action_description(register, description)
def spell_check_join_words(self, indices, doc, language, register=-1): """Join misspelled words based on spell-checker suggestions. Raise :exc:`enchant.Error` if dictionary instatiation fails. """ new_indices = [] new_texts = [] re_multispace = re.compile(r" +") checker = self._get_enchant_checker(language) seeker = self._get_enchant_checker(language) for index in indices or self.get_all_indices(): subtitle = self.subtitles[index] text = subtitle.get_text(doc) text = re_multispace.sub(" ", text) checker.set_text(unicode(text)) while True: try: checker.next() except StopIteration: break text = checker.get_text() a = checker.wordpos z = checker.wordpos + len(checker.word) ok_with_prev = ok_with_next = False if checker.leading_context(1) == " ": seeker.set_text(text[:a - 1] + text[a:]) poss = self._get_misspelled_indices(seeker) ok_with_prev = not (a - 1) in poss if checker.trailing_context(1) == " ": seeker.set_text(text[:z] + text[z + 1:]) poss = self._get_misspelled_indices(seeker) ok_with_next = not a in poss # Join backwards or forwards if only one direction, # but not both, produce a correctly spelled result. if ok_with_prev and not ok_with_next: checker.set_text(text[:a - 1] + text[a:]) if ok_with_next and not ok_with_prev: checker.set_text(text[:z] + text[z + 1:]) new_text = unicode(checker.get_text()) if new_text != text: new_indices.append(index) new_texts.append(new_text) if not new_indices: return self.replace_texts(new_indices, doc, new_texts, register=register) description = _("Joining words by spell-check suggestions") self.set_action_description(register, description)