class SpellCheckModel(): def __init__(self): # model = kenlm.Model(os.path.join(os.path.dirname(__file__), 'data', 'zh_giga.no_cna_cmn.prune01244.klm')) self.model = Corrector(language_model_path=os.path.join(os.path.dirname(__file__), 'data', 'zh_giga.no_cna_cmn.prune01244.klm')) pass def correct_cn_words(self, words): corrected_sent, detail = self.model.correct(words) # log.info('ori words:{0}; corrected words:{1}; detail:{2}'.format(words, corrected_sent, detail)) print('ori words:{0}; corrected words:{1}; detail:{2}'.format(words, corrected_sent, detail)) return corrected_sent
# -*- coding: utf-8 -*- """ @author:XuMing([email protected]) @description: """ from pycorrector import Corrector from pycorrector.config import common_char_path, same_pinyin_path, \ same_stroke_path, language_model_path, \ word_freq_path, \ custom_confusion_path # 使用三元文法语言模型(people_chars.klm)纠错效果更好: # language_model_path = '../pycorrector/data/kenlm/people_chars.klm' model = Corrector(common_char_path=common_char_path, same_pinyin_path=same_pinyin_path, same_stroke_path=same_stroke_path, language_model_path=language_model_path, word_freq_path=word_freq_path, custom_confusion_path=custom_confusion_path) error_sentences = [ '少先队员因该为老人让坐', '天地无垠大,我们的舞台无线大', ] for line in error_sentences: correct_sent = model.correct(line) print("original sentence:{} => correct sentence:{}".format( line, correct_sent))
def __init__(self): # model = kenlm.Model(os.path.join(os.path.dirname(__file__), 'data', 'zh_giga.no_cna_cmn.prune01244.klm')) self.model = Corrector(language_model_path=os.path.join(os.path.dirname(__file__), 'data', 'zh_giga.no_cna_cmn.prune01244.klm')) pass
# -*- coding: utf-8 -*- """ @author:XuMing([email protected]) @description: """ import sys sys.path.append("../") from pycorrector import Corrector import os pwd_path = os.path.abspath(os.path.dirname(__file__)) model = Corrector() error_sentences = [ '少先队员因该为老人让坐', '天地无垠大,我们的舞台无线大', '我的形像代言人', '我的形像坏人吗', '这么做为了谁?', '做为一个男人' ] for line in error_sentences: correct_sent = model.correct(line) print("original sentence:{} => correct sentence:{}".format(line, correct_sent))
# -*- coding: utf-8 -*- # @Time : 2020/7/28 18:31 # @Author : QUENTINNE # @File : main.py # @Software: PyCharm from pycorrector import Corrector import os pwd_path = os.path.abspath(os.path.dirname(__file__)) lm_path = os.path.join(pwd_path, './people_chars_lm.klm') model = Corrector(language_model_path=lm_path) corrected_sent, detail = model.correct('效国不是特别好') print(corrected_sent, detail)
# -*- coding: utf-8 -*- """ @author:XuMing([email protected]) @description: """ import sys sys.path.append("../") from pycorrector import Corrector import os pwd_path = os.path.abspath(os.path.dirname(__file__)) # 使用三元文法语言模型(people_chars.klm)纠错效果更好: new_language_model_path = os.path.join( pwd_path, '../pycorrector/data/kenlm/people_chars_lm.klm') model = Corrector() if os.path.exists(new_language_model_path): model.set_language_model_path(new_language_model_path) error_sentences = [ '少先队员因该为老人让坐', '天地无垠大,我们的舞台无线大', ] for line in error_sentences: correct_sent = model.correct(line) print("original sentence:{} => correct sentence:{}".format( line, correct_sent))
""" 该模块对query进行纠错 """ from .initial import console, emoji from pycorrector import Corrector #kenlm安装失败使用:pip install https://github.com/kpu/kenlm/archive/master.zip path = ('./data/people_chars_lm.klm') model = Corrector(language_model_path=path) def correct(sentence): """句子纠错 :param sentence:待纠错原句 :return result: 纠错完成后的句子 """ result, detail = model.correct(sentence) # print(result, detail) if sentence: console.print(emoji.emojize(':point_right:'),f"[bold gray]{result}[/bold gray]") return result
# -*- coding: utf-8 -*- """ @author:XuMing([email protected]) @description: """ import os import sys sys.path.append("../") from pycorrector import Corrector pwd_path = os.path.abspath(os.path.dirname(__file__)) if __name__ == '__main__': lm_path = os.path.join(pwd_path, '../pycorrector/data/people2014corpus_chars.klm') model = Corrector(language_model_path=lm_path) corrected_sent, detail = model.correct('少先队员因该为老人让坐') print(corrected_sent, detail)
# -*- coding: utf-8 -*- """ @author:XuMing([email protected]) @description: """ import os import sys sys.path.append("../") from pycorrector import Corrector pwd_path = os.path.abspath(os.path.dirname(__file__)) model = Corrector(language_model_path=pwd_path + '/../pycorrector/data/kenlm/people_chars_lm.klm') error_sentences = [ '少先队员因该为老人让坐', '天地无垠大,我们的舞台无线大', '我的形像代言人', '我的形像坏人吗', '这么做为了谁?', '做为一个男人' ] for line in error_sentences: correct_sent = model.correct(line) print("original sentence:{} => correct sentence:{}".format(line, correct_sent))
# coding:utf-8 import os from pycorrector import Corrector pwd_path = os.path.abspath(os.path.dirname(__file__)) lm_path = os.path.join(pwd_path, './lm_4gram_wxjs.arpa') model = Corrector(language_model_path=lm_path) corrected_sent, detail = model.correct('民警遂依法对被告人王昇辉血液进行采集') print(corrected_sent, detail)