Esempio n. 1
0
def initialize(): 
	chk_file = raw_input("What is the file to spellcheck?    ")
	field = raw_input("What FIELD do you want to spellcheck?   ")
	s_file = raw_input("What is name of final file?    ")

	checker = enchant.checker.SpellChecker("en_US")
	cmdln = CmdLineChecker()

	file_data = pd.read_csv(chk_file)

	fields = list(file_data.apply(lambda x:'%s' % (x[field]),axis=1))

	# maybe i don't even need this...
	#fields = strip_html(fields)

	corrected_text = []
	for data_field in fields:
		checker.set_text(str(data_field))
		for err in checker:
			print err.word
			print err.suggest()
			correct = raw_input("provide 0-index int of correct word or i to ignore, e to edit ")
			if correct == 'i':
				pass
			elif correct == 'e':
				suggest = raw_input("")
				err.replace(suggest)
			else:
				correct = int(correct)
				suggest = err.suggest()[correct]
				err.replace(suggest)
		corrected_text.append(checker.get_text())

	saved_file = write_fixed_file(corrected_text, s_file)
def do_check(checker,to_check):
    for text in to_check:
        checker.set_text(text)
        cmdline_checker = CmdLineChecker()
        cmdline_checker.set_checker(checker)
        cmdline_checker.run()
        to_check[to_check.index(text)] = checker.get_text()
Esempio n. 3
0
    def spell_check_split_words(self, indices, doc, language, register=-1):
        """
        Split misspelled words based on spell-checker suggestions.

        Using this is usually not a good idea unless you have an insane
        dictionary that contains all possible compound words in `language`.
        Raise :exc:`enchant.Error` if dictionary instatiation fails.
        """
        new_indices = []
        new_texts = []
        re_multispace = re.compile(r" +")
        checker = self._get_enchant_checker(language)
        indices = indices or self.get_all_indices()
        for index in indices:
            subtitle = self.subtitles[index]
            text = subtitle.get_text(doc)
            text = re_multispace.sub(" ", text)
            checker.set_text(text)
            while True:
                try:
                    next(checker)
                except StopIteration:
                    break
                if checker.word.capitalize() == checker.word:
                    # Skip capitalized words, which are usually names
                    # and thus not always found in dictionaries.
                    continue
                suggestions = []
                for i, suggestion in enumerate(checker.suggest()):
                    if suggestion.find(" ") > 0:
                        if suggestion.replace(" ", "") == checker.word:
                            suggestions.append(suggestion)
                # Split word only if only one two-word suggestion found that
                # has all the same characters as the original unsplit word.
                if len(suggestions) != 1: continue
                text = checker.get_text()
                a = checker.wordpos
                z = checker.wordpos + len(checker.word)
                checker.set_text(text[:a] + suggestions[0] + text[z:])
            new_text = checker.get_text()
            if new_text != text:
                new_indices.append(index)
                new_texts.append(new_text)
        if not new_indices: return
        self.replace_texts(new_indices, doc, new_texts, register=register)
        description = _("Splitting words by spell-check suggestions")
        self.set_action_description(register, description)
Esempio n. 4
0
    def spell_check_join_words(self, indices, doc, language, register=-1):
        """
        Join misspelled words based on spell-checker suggestions.

        Raise :exc:`enchant.Error` if dictionary instatiation fails.
        """
        new_indices = []
        new_texts = []
        re_multispace = re.compile(r" +")
        checker = self._get_enchant_checker(language)
        seeker = self._get_enchant_checker(language)
        for index in indices or self.get_all_indices():
            subtitle = self.subtitles[index]
            text = subtitle.get_text(doc)
            text = re_multispace.sub(" ", text)
            checker.set_text(text)
            while True:
                try:
                    next(checker)
                except StopIteration:
                    break
                text = checker.get_text()
                a = checker.wordpos
                z = checker.wordpos + len(checker.word)
                ok_with_prev = ok_with_next = False
                if checker.leading_context(1) == " ":
                    seeker.set_text(text[:a - 1] + text[a:])
                    poss = self._get_misspelled_indices(seeker)
                    ok_with_prev = not (a - 1) in poss
                if checker.trailing_context(1) == " ":
                    seeker.set_text(text[:z] + text[z + 1:])
                    poss = self._get_misspelled_indices(seeker)
                    ok_with_next = not a in poss
                # Join backwards or forwards if only one direction,
                # but not both, produce a correctly spelled result.
                if ok_with_prev and not ok_with_next:
                    checker.set_text(text[:a - 1] + text[a:])
                if ok_with_next and not ok_with_prev:
                    checker.set_text(text[:z] + text[z + 1:])
            new_text = checker.get_text()
            if new_text != text:
                new_indices.append(index)
                new_texts.append(new_text)
        if not new_indices: return
        self.replace_texts(new_indices, doc, new_texts, register=register)
        description = _("Joining words by spell-check suggestions")
        self.set_action_description(register, description)
Esempio n. 5
0
    def spell_check_split_words(self, indices, doc, language, register=-1):
        """
        Split misspelled words based on spell-checker suggestions.

        Using this is usually not a good idea unless you have an insane
        dictionary that contains all possible compound words in `language`.
        Raise :exc:`enchant.Error` if dictionary instatiation fails.
        """
        new_indices = []
        new_texts = []
        re_multispace = re.compile(r" +")
        checker = self._get_enchant_checker(language)
        indices = indices or self.get_all_indices()
        for index in indices:
            subtitle = self.subtitles[index]
            text = subtitle.get_text(doc)
            text = re_multispace.sub(" ", text)
            checker.set_text(text)
            while True:
                try: next(checker)
                except StopIteration: break
                if checker.word.capitalize() == checker.word:
                    # Skip capitalized words, which are usually names
                    # and thus not always found in dictionaries.
                    continue
                suggestions = []
                for i, suggestion in enumerate(checker.suggest()):
                    if suggestion.find(" ") > 0:
                        if suggestion.replace(" ", "") == checker.word:
                            suggestions.append(suggestion)
                # Split word only if only one two-word suggestion found that
                # has all the same characters as the original unsplit word.
                if len(suggestions) != 1: continue
                text = checker.get_text()
                a = checker.wordpos
                z = checker.wordpos + len(checker.word)
                checker.set_text(text[:a] + suggestions[0] + text[z:])
            new_text = checker.get_text()
            if new_text != text:
                new_indices.append(index)
                new_texts.append(new_text)
        if not new_indices: return
        self.replace_texts(new_indices, doc, new_texts, register=register)
        description = _("Splitting words by spell-check suggestions")
        self.set_action_description(register, description)
Esempio n. 6
0
    def spell_check_join_words(self, indices, doc, language, register=-1):
        """
        Join misspelled words based on spell-checker suggestions.

        Raise :exc:`enchant.Error` if dictionary instatiation fails.
        """
        new_indices = []
        new_texts = []
        re_multispace = re.compile(r" +")
        checker = self._get_enchant_checker(language)
        seeker = self._get_enchant_checker(language)
        for index in indices or self.get_all_indices():
            subtitle = self.subtitles[index]
            text = subtitle.get_text(doc)
            text = re_multispace.sub(" ", text)
            checker.set_text(text)
            while True:
                try: next(checker)
                except StopIteration: break
                text = checker.get_text()
                a = checker.wordpos
                z = checker.wordpos + len(checker.word)
                ok_with_prev = ok_with_next = False
                if checker.leading_context(1) == " ":
                    seeker.set_text(text[:a - 1] + text[a:])
                    poss = self._get_misspelled_indices(seeker)
                    ok_with_prev = not (a - 1) in poss
                if checker.trailing_context(1) == " ":
                    seeker.set_text(text[:z] + text[z + 1:])
                    poss = self._get_misspelled_indices(seeker)
                    ok_with_next = not a in poss
                # Join backwards or forwards if only one direction,
                # but not both, produce a correctly spelled result.
                if ok_with_prev and not ok_with_next:
                    checker.set_text(text[:a - 1] + text[a:])
                if ok_with_next and not ok_with_prev:
                    checker.set_text(text[:z] + text[z + 1:])
            new_text = checker.get_text()
            if new_text != text:
                new_indices.append(index)
                new_texts.append(new_text)
        if not new_indices: return
        self.replace_texts(new_indices, doc, new_texts, register=register)
        description = _("Joining words by spell-check suggestions")
        self.set_action_description(register, description)