def __init__(self): self.WORDS = dump_dict.getWORDS(self.pWORDSPATH)
from collections import Counter TEXTSPATH = "/home/dicle/Documents/data/tr/tr_gazete_siir/tr_text_compilation.txt" #def words(text): return re.findall(r'\w+', text.lower()) def words(text): return re.findall(r'\w+', text) #def words(text): return text.split() WORDS = Counter(words(open(TEXTSPATH).read())) ''' import re import string from time import time from language_tools.spellchecker import dump_dict WORDS = dump_dict.getWORDS() def P(word, N=sum(WORDS.values())): "Probability of `word`." return WORDS[word] / N def correction(word): "Most probable spelling correction for word." return max(candidates(word), key=P) def candidates(word): "Generate possible spelling corrections for word." return (known([word]) or known(edits1(word)) or known(edits2(word))