コード例 #1
0
ファイル: TrSpellChecker.py プロジェクト: burakyldrm/calisma
 def __init__(self):
 
     self.WORDS = dump_dict.getWORDS(self.pWORDSPATH)
コード例 #2
0
from collections import Counter

TEXTSPATH = "/home/dicle/Documents/data/tr/tr_gazete_siir/tr_text_compilation.txt"
#def words(text): return re.findall(r'\w+', text.lower())
def words(text): return re.findall(r'\w+', text)
#def words(text): return text.split()
WORDS = Counter(words(open(TEXTSPATH).read()))
'''

import re
import string
from time import time

from language_tools.spellchecker import dump_dict

WORDS = dump_dict.getWORDS()


def P(word, N=sum(WORDS.values())):
    "Probability of `word`."
    return WORDS[word] / N


def correction(word):
    "Most probable spelling correction for word."
    return max(candidates(word), key=P)


def candidates(word):
    "Generate possible spelling corrections for word."
    return (known([word]) or known(edits1(word)) or known(edits2(word))