Esempio n. 1
0
class SpellCheckModel():

    def __init__(self):
        # model = kenlm.Model(os.path.join(os.path.dirname(__file__), 'data', 'zh_giga.no_cna_cmn.prune01244.klm'))
        self.model = Corrector(language_model_path=os.path.join(os.path.dirname(__file__), 'data', 'zh_giga.no_cna_cmn.prune01244.klm'))
        pass

    def correct_cn_words(self, words):
        corrected_sent, detail = self.model.correct(words)
        # log.info('ori words:{0}; corrected words:{1}; detail:{2}'.format(words, corrected_sent, detail))
        print('ori words:{0}; corrected words:{1}; detail:{2}'.format(words, corrected_sent, detail))
        return corrected_sent
# -*- coding: utf-8 -*-
"""
@author:XuMing([email protected])
@description: 
"""
from pycorrector import Corrector

from pycorrector.config import common_char_path, same_pinyin_path, \
    same_stroke_path, language_model_path, \
    word_freq_path, \
    custom_confusion_path

# 使用三元文法语言模型(people_chars.klm)纠错效果更好:
# language_model_path = '../pycorrector/data/kenlm/people_chars.klm'
model = Corrector(common_char_path=common_char_path,
                  same_pinyin_path=same_pinyin_path,
                  same_stroke_path=same_stroke_path,
                  language_model_path=language_model_path,
                  word_freq_path=word_freq_path,
                  custom_confusion_path=custom_confusion_path)

error_sentences = [
    '少先队员因该为老人让坐',
    '天地无垠大,我们的舞台无线大',
]
for line in error_sentences:
    correct_sent = model.correct(line)
    print("original sentence:{} => correct sentence:{}".format(
        line, correct_sent))
# -*- coding: utf-8 -*-
"""
@author:XuMing([email protected])
@description: 
"""
import os
import sys

sys.path.append("../")
from pycorrector import Corrector

pwd_path = os.path.abspath(os.path.dirname(__file__))

if __name__ == '__main__':
    lm_path = os.path.join(pwd_path,
                           '../pycorrector/data/people2014corpus_chars.klm')
    model = Corrector(language_model_path=lm_path)

    corrected_sent, detail = model.correct('少先队员因该为老人让坐')
    print(corrected_sent, detail)
Esempio n. 4
0
# -*- coding: utf-8 -*-
# @Time    : 2020/7/28 18:31
# @Author  : QUENTINNE
# @File    : main.py
# @Software: PyCharm

from pycorrector import Corrector
import os

pwd_path = os.path.abspath(os.path.dirname(__file__))
lm_path = os.path.join(pwd_path, './people_chars_lm.klm')
model = Corrector(language_model_path=lm_path)

corrected_sent, detail = model.correct('效国不是特别好')
print(corrected_sent, detail)
Esempio n. 5
0
# coding:utf-8
import os
from pycorrector import Corrector

pwd_path = os.path.abspath(os.path.dirname(__file__))

lm_path = os.path.join(pwd_path, './lm_4gram_wxjs.arpa')
model = Corrector(language_model_path=lm_path)

corrected_sent, detail = model.correct('民警遂依法对被告人王昇辉血液进行采集')
print(corrected_sent, detail)