Esempio n. 1
0
class SpellCheckModel():

    def __init__(self):
        # model = kenlm.Model(os.path.join(os.path.dirname(__file__), 'data', 'zh_giga.no_cna_cmn.prune01244.klm'))
        self.model = Corrector(language_model_path=os.path.join(os.path.dirname(__file__), 'data', 'zh_giga.no_cna_cmn.prune01244.klm'))
        pass

    def correct_cn_words(self, words):
        corrected_sent, detail = self.model.correct(words)
        # log.info('ori words:{0}; corrected words:{1}; detail:{2}'.format(words, corrected_sent, detail))
        print('ori words:{0}; corrected words:{1}; detail:{2}'.format(words, corrected_sent, detail))
        return corrected_sent
# -*- coding: utf-8 -*-
"""
@author:XuMing([email protected])
@description: 
"""
from pycorrector import Corrector

from pycorrector.config import common_char_path, same_pinyin_path, \
    same_stroke_path, language_model_path, \
    word_freq_path, \
    custom_confusion_path

# 使用三元文法语言模型(people_chars.klm)纠错效果更好:
# language_model_path = '../pycorrector/data/kenlm/people_chars.klm'
model = Corrector(common_char_path=common_char_path,
                  same_pinyin_path=same_pinyin_path,
                  same_stroke_path=same_stroke_path,
                  language_model_path=language_model_path,
                  word_freq_path=word_freq_path,
                  custom_confusion_path=custom_confusion_path)

error_sentences = [
    '少先队员因该为老人让坐',
    '天地无垠大,我们的舞台无线大',
]
for line in error_sentences:
    correct_sent = model.correct(line)
    print("original sentence:{} => correct sentence:{}".format(
        line, correct_sent))
Esempio n. 3
0
 def __init__(self):
     # model = kenlm.Model(os.path.join(os.path.dirname(__file__), 'data', 'zh_giga.no_cna_cmn.prune01244.klm'))
     self.model = Corrector(language_model_path=os.path.join(os.path.dirname(__file__), 'data', 'zh_giga.no_cna_cmn.prune01244.klm'))
     pass
# -*- coding: utf-8 -*-
"""
@author:XuMing([email protected])
@description: 
"""
import sys

sys.path.append("../")
from pycorrector import Corrector
import os

pwd_path = os.path.abspath(os.path.dirname(__file__))
model = Corrector()

error_sentences = [
    '少先队员因该为老人让坐',
    '天地无垠大,我们的舞台无线大',
    '我的形像代言人',
    '我的形像坏人吗',
    '这么做为了谁?',
    '做为一个男人'
]
for line in error_sentences:
    correct_sent = model.correct(line)
    print("original sentence:{} => correct sentence:{}".format(line, correct_sent))
Esempio n. 5
0
# -*- coding: utf-8 -*-
# @Time    : 2020/7/28 18:31
# @Author  : QUENTINNE
# @File    : main.py
# @Software: PyCharm

from pycorrector import Corrector
import os

pwd_path = os.path.abspath(os.path.dirname(__file__))
lm_path = os.path.join(pwd_path, './people_chars_lm.klm')
model = Corrector(language_model_path=lm_path)

corrected_sent, detail = model.correct('效国不是特别好')
print(corrected_sent, detail)
# -*- coding: utf-8 -*-
"""
@author:XuMing([email protected])
@description: 
"""
import sys

sys.path.append("../")
from pycorrector import Corrector
import os
pwd_path = os.path.abspath(os.path.dirname(__file__))
# 使用三元文法语言模型(people_chars.klm)纠错效果更好:
new_language_model_path = os.path.join(
    pwd_path, '../pycorrector/data/kenlm/people_chars_lm.klm')
model = Corrector()
if os.path.exists(new_language_model_path):
    model.set_language_model_path(new_language_model_path)

error_sentences = [
    '少先队员因该为老人让坐',
    '天地无垠大,我们的舞台无线大',
]
for line in error_sentences:
    correct_sent = model.correct(line)
    print("original sentence:{} => correct sentence:{}".format(
        line, correct_sent))
Esempio n. 7
0
"""
该模块对query进行纠错
"""
from .initial import console, emoji
from pycorrector import Corrector #kenlm安装失败使用:pip install https://github.com/kpu/kenlm/archive/master.zip

path = ('./data/people_chars_lm.klm')
model = Corrector(language_model_path=path)

def correct(sentence):
    """句子纠错
    
    :param sentence:待纠错原句
    :return result: 纠错完成后的句子
    """
    result, detail = model.correct(sentence)
    # print(result, detail)
    if sentence:
        console.print(emoji.emojize(':point_right:'),f"[bold gray]{result}[/bold gray]")
    return result
# -*- coding: utf-8 -*-
"""
@author:XuMing([email protected])
@description: 
"""
import os
import sys

sys.path.append("../")
from pycorrector import Corrector

pwd_path = os.path.abspath(os.path.dirname(__file__))

if __name__ == '__main__':
    lm_path = os.path.join(pwd_path,
                           '../pycorrector/data/people2014corpus_chars.klm')
    model = Corrector(language_model_path=lm_path)

    corrected_sent, detail = model.correct('少先队员因该为老人让坐')
    print(corrected_sent, detail)
# -*- coding: utf-8 -*-
"""
@author:XuMing([email protected])
@description: 
"""
import os
import sys

sys.path.append("../")
from pycorrector import Corrector

pwd_path = os.path.abspath(os.path.dirname(__file__))
model = Corrector(language_model_path=pwd_path + '/../pycorrector/data/kenlm/people_chars_lm.klm')

error_sentences = [
    '少先队员因该为老人让坐',
    '天地无垠大,我们的舞台无线大',
    '我的形像代言人',
    '我的形像坏人吗',
    '这么做为了谁?',
    '做为一个男人'
]
for line in error_sentences:
    correct_sent = model.correct(line)
    print("original sentence:{} => correct sentence:{}".format(line, correct_sent))
Esempio n. 10
0
# coding:utf-8
import os
from pycorrector import Corrector

pwd_path = os.path.abspath(os.path.dirname(__file__))

lm_path = os.path.join(pwd_path, './lm_4gram_wxjs.arpa')
model = Corrector(language_model_path=lm_path)

corrected_sent, detail = model.correct('民警遂依法对被告人王昇辉血液进行采集')
print(corrected_sent, detail)