def spell(word): """most likely correction for everything up to a double typo""" w = Word(word) candidates = (common([word]) or exact([word]) or known([word]) or known(w.typos()) or common(w.double_typos()) or [word]) correction = max(candidates, key=lambda item: NLP_COUNTS[item]) return get_case(word, correction)
def autocorrect(misspelled): """most likely correction for everything up to a double typo""" w = Word(misspelled) candidates = (common([misspelled]) or exact([misspelled]) or known([misspelled]) or known(w.typos()) or common(w.double_typos()) or [misspelled]) correction = max(candidates, key=NLP_COUNTS.get) return get_case(misspelled, correction)
def spell(word): """most likely correction for everything up to a double typo""" w = Word(word) candidates = ( common([word]) or exact([word]) or known([word]) or known(w.typos()) or common(w.double_typos()) #or [word] ) correction = '<UNK>' if candidates: correction = max(candidates, key=NLP_COUNTS.get) correction = get_case(word, correction) return correction
def spell(word, lang_sample, file_format='bz'): from autocorrect.nlp_parser import parse from autocorrect.word import Word, common, exact, known, get_case """most likely correction for everything up to a double typo""" if file_format == 'bz': NLP_WORDS, NLP_COUNTS = parse('big.txt', 'bz') elif file_format == 'txt': NLP_WORDS, NLP_COUNTS = parse(lang_sample, 'txt') w = Word(word) candidates = (common([word], NLP_WORDS) or exact([word]) or known([word]) or known(w.typos()) or common(w.double_typos()) or [word]) correction = max(candidates, key=NLP_COUNTS.get) return get_case(word, correction)
def spell(word, language='en'): """The language parameter takes into account of the language. most likely correction for everything up to a double typo""" if (language == 'en'): w = Word(word) candidates = (common([word]) or exact([word]) or known([word]) or known(w.typos()) or common(w.double_typos()) or [word]) correction = max(candidates, key=NLP_COUNTS.get) return get_case(word, correction) elif (language == 'bn'): w = Word(word) candidates = (common([word]) or exact([word]) or known([word]) or known(w.typos()) or common(w.double_typos()) or [word]) correction = max(candidates, key=NLP_COUNTS_BN.get) return get_case(word, correction) else: raise ValueError("This language is not supported")
def spelltest(tests, verbose=False): n, bad, unknown, start = 0, 0, 0, time.clock() for target, incorrect_spellings in tests.items(): for incorrect_spelling in incorrect_spellings.split(): n += 1 w = spell(incorrect_spelling) if w != target: bad += 1 if not known([target]): unknown += 1 if verbose: print(MSG.format(incorrect_spelling, w, NLP_COUNTS[w], target, NLP_COUNTS[target])) return RESULT.format(bad, n, int(100. - 100. * bad / n), unknown, int(time.clock() - start))
def spelltest(tests, verbose=False): n, bad, unknown, start = 0, 0, 0, time.clock() for target, incorrect_spellings in tests.items(): for incorrect_spelling in incorrect_spellings.split(): n += 1 w = spell(incorrect_spelling) if w != target: bad += 1 if not known([target]): unknown += 1 if verbose: print( MSG.format(incorrect_spelling, w, NLP_COUNTS[w], target, NLP_COUNTS[target])) return RESULT.format(bad, n, int(100. - 100. * bad / n), unknown, (time.clock() - start))