Ejemplo n.º 1
0
def autocorrect(misspelled):
    """most likely correction for everything up to a double typo"""
    w = Word(misspelled)
    candidates = (common([misspelled]) or exact([misspelled]) or known([misspelled]) or
                    known(w.typos()) or common(w.double_typos()) or [misspelled])
    correction = max(candidates, key=NLP_COUNTS.get)
    return get_case(misspelled, correction)
Ejemplo n.º 2
0
def spell(word):
    """most likely correction for everything up to a double typo"""
    w = Word(word)
    candidates = (common([word]) or exact([word]) or known([word])
                  or known(w.typos()) or common(w.double_typos()) or [word])
    correction = max(candidates, key=lambda item: NLP_COUNTS[item])
    return get_case(word, correction)
Ejemplo n.º 3
0
def spell(word):
    """most likely correction for everything up to a double typo"""
    w = Word(word)
    candidates = (
        common([word]) or exact([word]) or known([word]) or known(w.typos())
        or common(w.double_typos())  #or [word]
    )
    correction = '<UNK>'
    if candidates:
        correction = max(candidates, key=NLP_COUNTS.get)
        correction = get_case(word, correction)
    return correction
Ejemplo n.º 4
0
def spell(word, lang_sample, file_format='bz'):
    from autocorrect.nlp_parser import parse
    from autocorrect.word import Word, common, exact, known, get_case
    """most likely correction for everything up to a double typo"""

    if file_format == 'bz':
        NLP_WORDS, NLP_COUNTS = parse('big.txt', 'bz')
    elif file_format == 'txt':
        NLP_WORDS, NLP_COUNTS = parse(lang_sample, 'txt')

    w = Word(word)
    candidates = (common([word], NLP_WORDS) or exact([word]) or known([word])
                  or known(w.typos()) or common(w.double_typos()) or [word])
    correction = max(candidates, key=NLP_COUNTS.get)
    return get_case(word, correction)
Ejemplo n.º 5
0
def spell(word, language='en'):
    """The language parameter takes into account of the language.
       most likely correction for everything up to a double typo"""
    if (language == 'en'):
        w = Word(word)
        candidates = (common([word]) or exact([word]) or known([word])
                      or known(w.typos()) or common(w.double_typos())
                      or [word])
        correction = max(candidates, key=NLP_COUNTS.get)
        return get_case(word, correction)
    elif (language == 'bn'):
        w = Word(word)
        candidates = (common([word]) or exact([word]) or known([word])
                      or known(w.typos()) or common(w.double_typos())
                      or [word])
        correction = max(candidates, key=NLP_COUNTS_BN.get)
        return get_case(word, correction)
    else:
        raise ValueError("This language is not supported")