def autocorrect(misspelled): """most likely correction for everything up to a double typo""" w = Word(misspelled) candidates = (common([misspelled]) or exact([misspelled]) or known([misspelled]) or known(w.typos()) or common(w.double_typos()) or [misspelled]) correction = max(candidates, key=NLP_COUNTS.get) return get_case(misspelled, correction)
def spell(word): """most likely correction for everything up to a double typo""" w = Word(word) candidates = (common([word]) or exact([word]) or known([word]) or known(w.typos()) or common(w.double_typos()) or [word]) correction = max(candidates, key=lambda item: NLP_COUNTS[item]) return get_case(word, correction)
def spell(word): """most likely correction for everything up to a double typo""" w = Word(word) candidates = ( common([word]) or exact([word]) or known([word]) or known(w.typos()) or common(w.double_typos()) #or [word] ) correction = '<UNK>' if candidates: correction = max(candidates, key=NLP_COUNTS.get) correction = get_case(word, correction) return correction
def spell(word, lang_sample, file_format='bz'): from autocorrect.nlp_parser import parse from autocorrect.word import Word, common, exact, known, get_case """most likely correction for everything up to a double typo""" if file_format == 'bz': NLP_WORDS, NLP_COUNTS = parse('big.txt', 'bz') elif file_format == 'txt': NLP_WORDS, NLP_COUNTS = parse(lang_sample, 'txt') w = Word(word) candidates = (common([word], NLP_WORDS) or exact([word]) or known([word]) or known(w.typos()) or common(w.double_typos()) or [word]) correction = max(candidates, key=NLP_COUNTS.get) return get_case(word, correction)
def spell(word, language='en'): """The language parameter takes into account of the language. most likely correction for everything up to a double typo""" if (language == 'en'): w = Word(word) candidates = (common([word]) or exact([word]) or known([word]) or known(w.typos()) or common(w.double_typos()) or [word]) correction = max(candidates, key=NLP_COUNTS.get) return get_case(word, correction) elif (language == 'bn'): w = Word(word) candidates = (common([word]) or exact([word]) or known([word]) or known(w.typos()) or common(w.double_typos()) or [word]) correction = max(candidates, key=NLP_COUNTS_BN.get) return get_case(word, correction) else: raise ValueError("This language is not supported")