class SpellCheckModel(): def __init__(self): # model = kenlm.Model(os.path.join(os.path.dirname(__file__), 'data', 'zh_giga.no_cna_cmn.prune01244.klm')) self.model = Corrector(language_model_path=os.path.join(os.path.dirname(__file__), 'data', 'zh_giga.no_cna_cmn.prune01244.klm')) pass def correct_cn_words(self, words): corrected_sent, detail = self.model.correct(words) # log.info('ori words:{0}; corrected words:{1}; detail:{2}'.format(words, corrected_sent, detail)) print('ori words:{0}; corrected words:{1}; detail:{2}'.format(words, corrected_sent, detail)) return corrected_sent
# -*- coding: utf-8 -*- """ @author:XuMing([email protected]) @description: """ from pycorrector import Corrector from pycorrector.config import common_char_path, same_pinyin_path, \ same_stroke_path, language_model_path, \ word_freq_path, \ custom_confusion_path # 使用三元文法语言模型(people_chars.klm)纠错效果更好: # language_model_path = '../pycorrector/data/kenlm/people_chars.klm' model = Corrector(common_char_path=common_char_path, same_pinyin_path=same_pinyin_path, same_stroke_path=same_stroke_path, language_model_path=language_model_path, word_freq_path=word_freq_path, custom_confusion_path=custom_confusion_path) error_sentences = [ '少先队员因该为老人让坐', '天地无垠大,我们的舞台无线大', ] for line in error_sentences: correct_sent = model.correct(line) print("original sentence:{} => correct sentence:{}".format( line, correct_sent))
# -*- coding: utf-8 -*- """ @author:XuMing([email protected]) @description: """ import os import sys sys.path.append("../") from pycorrector import Corrector pwd_path = os.path.abspath(os.path.dirname(__file__)) if __name__ == '__main__': lm_path = os.path.join(pwd_path, '../pycorrector/data/people2014corpus_chars.klm') model = Corrector(language_model_path=lm_path) corrected_sent, detail = model.correct('少先队员因该为老人让坐') print(corrected_sent, detail)
# -*- coding: utf-8 -*- # @Time : 2020/7/28 18:31 # @Author : QUENTINNE # @File : main.py # @Software: PyCharm from pycorrector import Corrector import os pwd_path = os.path.abspath(os.path.dirname(__file__)) lm_path = os.path.join(pwd_path, './people_chars_lm.klm') model = Corrector(language_model_path=lm_path) corrected_sent, detail = model.correct('效国不是特别好') print(corrected_sent, detail)
# coding:utf-8 import os from pycorrector import Corrector pwd_path = os.path.abspath(os.path.dirname(__file__)) lm_path = os.path.join(pwd_path, './lm_4gram_wxjs.arpa') model = Corrector(language_model_path=lm_path) corrected_sent, detail = model.correct('民警遂依法对被告人王昇辉血液进行采集') print(corrected_sent, detail)