Esempio n. 1
0
def main(**kwargs):
    """
    Cmd script of correct. Input text file, output corrected text file.
    :param kwargs: input, a text file object that will be read from. Should contain utf-8 sentence per line
    :param output: a text file object where parsed output will be written. Parsed output will be similar to CSV data
    :type input: text file object in read mode
    :type output: text file object in write mode
    :return:
    """
    no_char = kwargs['no_char'] if 'no_char' in kwargs else False
    if no_char:
        pycorrector.enable_char_error(enable=False)
        print('disable char error detect.')

    detail = kwargs['detail'] if 'detail' in kwargs else False
    count = 0
    with open(kwargs['input'], 'r',
              encoding='utf-8') as fr, open(kwargs['output'],
                                            'w',
                                            encoding='utf-8') as fw:
        for line in fr:
            line = line.strip()
            corrected_sent, info = pycorrector.correct(line)
            count += 1
            r = corrected_sent
            if detail:
                r = corrected_sent + '\t' + str(info)
            fw.write(line + '\t' + r + '\n')
        print('{} lines in output'.format(count))
Esempio n. 2
0
def test_brand():
    """测试品牌名纠错"""
    pycorrector.enable_char_error(enable=False)
    error_sentence_1 = '买衣服就到拼哆哆'  # 拼多多
    correct_sent = pycorrector.correct(error_sentence_1)
    print("original sentence:{} => correct sentence:{}".format(error_sentence_1, correct_sent))

    error_sentence_1 = '这个特仑素牛奶喝起来还不错吧'  # 特仑苏
    correct_sent = pycorrector.correct(error_sentence_1)
    print("original sentence:{} => correct sentence:{}".format(error_sentence_1, correct_sent))
Esempio n. 3
0
def test_suyu():
    """测试俗语纠错"""
    pycorrector.enable_char_error(enable=False)

    error_sentence_1 = '这衣服买给她吧,也是肥水步流外人田'  # 肥水不流外人田
    correct_sent = pycorrector.correct(error_sentence_1)
    print("original sentence:{} => correct sentence:{}".format(error_sentence_1, correct_sent))

    error_sentence_1 = '这么多字让他写也是赶鸭子打架'  # 赶鸭子上架
    correct_sent = pycorrector.correct(error_sentence_1)
    print("original sentence:{} => correct sentence:{}".format(error_sentence_1, correct_sent))
Esempio n. 4
0
def test_disease():
    """测试疾病名纠错"""
    pycorrector.enable_char_error(enable=False)
    error_sentence_1 = '这个新药奥美砂坦脂片能治疗心绞痛,效果还可以'  # 奥美沙坦酯片

    correct_sent = pycorrector.correct(error_sentence_1)
    print("original sentence:{} => correct sentence:{}".format(error_sentence_1, correct_sent))

    error_sentence_1 = '有个药名叫硫酸氢录吡各雷片能治疗高血压'  # 硫酸氢氯吡格雷片
    correct_sent = pycorrector.correct(error_sentence_1)
    print("original sentence:{} => correct sentence:{}".format(error_sentence_1, correct_sent))
Esempio n. 5
0
def test_chengyu():
    """测试成语纠错"""
    pycorrector.enable_char_error(enable=False)
    error_sentence_1 = '这块名表带带相传'  # 代代相传
    correct_sent = pycorrector.correct(error_sentence_1)
    print("original sentence:{} => correct sentence:{}".format(
        error_sentence_1, correct_sent))

    error_sentence_1 = '他贰话不说把牛奶喝完了'  # 二话不说
    correct_sent = pycorrector.correct(error_sentence_1)
    print("original sentence:{} => correct sentence:{}".format(
        error_sentence_1, correct_sent))

    # 这家伙还蛮格((恪))尽职守的。
    # 报应接中迩((而))来。
    # 人群穿((川))流不息。
    # 这个消息不径((胫))而走。
    # 眼前的场景美仑((轮))美幻简直超出了人类的想象。
    # 看着这两个人谈笑风声((生))我心理((里))不由有些忌妒。
    # 有了这一番旁证((征))博引。
    x = [
        '这家伙还蛮格尽职守的',
        '报应接中迩来',  # 接踵而来
        '人群穿流不息',
        '这个消息不径而走',
        '这个消息不胫儿走',
        '眼前的场景美仑美幻简直超出了人类的想象',
        '看着这两个人谈笑风声我心理不由有些忌妒',
        '有了这一番旁证博引',
        '有了这一番旁针博引',
    ]

    for i in x:
        print(i, pycorrector.detect(i))
        print(i, pycorrector.correct(i))

    pycorrector.enable_char_error(enable=True)
    print("-" * 42)
    for i in x:
        print(i, pycorrector.detect(i))
        print(i, pycorrector.correct(i))
Esempio n. 6
0
# -*- coding: utf-8 -*-
"""
@author:XuMing([email protected])
@description: 
"""

import sys

sys.path.append("../")

import pycorrector

if __name__ == '__main__':
    error_sentence_1 = '我的喉咙发炎了要买点阿莫细林吃'
    pycorrector.enable_char_error(enable=False)
    correct_sent = pycorrector.correct(error_sentence_1)
    print("original sentence:{} => correct sentence:{}".format(
        error_sentence_1, correct_sent))