Ejemplo n.º 1
0
def edits1(word):
    splits = [(word[:i], word[i:]) for i in range(len(word) + 1)]
    deletes = [a + b[1:] for a, b in splits if b]
    transposes = [a + b[1] + b[0] + b[2:] for a, b in splits if len(b) > 1]
    replaces = [a + c + b[1:] for a, b in splits for c in alphabet if b]
    inserts = [a + c + b for a, b in splits for c in alphabet]
    return set(deletes + transposes + replaces + inserts)


def datafile(name, sep='\t'):
    """Read key,value pairs from file."""
    for line in file(name):
        yield line.split(sep)


edit_frequencies = WordFrequency.from_freq_file('Norvig/edits/count_1edit.txt')


p_spell_error = 1./20.


def p_edit(edit):
    """The probability of an edit; can be '' or 'a|b' """
    if edit == '':
        return 1. - p_spell_error
    return p_spell_error * edit_frequencies.get_probability(edit)

PREFIXES = set(w[:i] for w in Pw for i in range(len(w) + 1))


def edits(word, dictionary, d=2):
Ejemplo n.º 2
0
from Context import Context
from EditDistance import edits
from WordFrequency import WordFrequency
import re
import Readers

count_1w = WordFrequency.from_freq_file("data/Norvig/wordfreqs/count_1w.txt")
count_2w = WordFrequency.from_freq_file("data/Norvig/wordfreqs/count_2w.txt")



def corrections(text):
    "Spell-correct all words in text."
    return re.sub('[a-zA-Z]+', lambda m: correct(m.group(0)), text)

def correct(context):
    "Return the word that is the most likely spell correction of w."
    candidates = edits(context.word()).items()

    #c, edit = max(candidates, key=lambda (c,e): Pedit(e) * Pw(c))
    #return c


for context in Context.gen_context_sequence_from_word_sequence(Readers.gen_words_from_file("doc.txt")):
    correct(context)