예제 #1
0
def correct(sentence):
    """
    句子改错
    :param sentence: 句子文本
    :return: 改正后的句子, list(wrongs, rights, begin_idx, end_idx)
    """
    detail = []
    maybe_error_ids = get_sub_array(detect(sentence))
    # print('maybe_error_ids:', maybe_error_ids)
    # 取得字词对应表
    index_char_dict = dict()
    for index in maybe_error_ids:
        if len(index) == 1:
            # 字
            index_char_dict[','.join(map(str, index))] = sentence[index[0]]
        else:
            # 词
            index_char_dict[','.join(map(
                str, index))] = sentence[index[0]:index[-1]]
    for index, item in index_char_dict.items():
        # 字词纠错
        sentence, detail_word = _correct_item(sentence, index, item)
        if detail_word:
            detail.append(detail_word)
    return sentence, detail
예제 #2
0
def correct(sentence, param_ec = 1.5, param_gd = 2.5):

    detail = []

    # # detecting for errors
    maybe_error_ids = get_valid_sub_array(sentence, get_sub_array(detect(sentence)))

    # # transfer index of error chars into pairs of (idx, error_chars)
    suspect_chars = [[','.join([str(i[0]), str(i[-1])]), sentence[i[0]: i[-1]]] for i in maybe_error_ids]

    # # statistical correction
    sentence, detail_stat = correct_stat(sentence, suspect_chars, param_ec, param_gd)
    detail += detail_stat

    # # rule-based correction
    sentence, detail_rule = correct_rule(sentence)
    detail += detail_rule

    return sentence, detail
예제 #3
0
def correct(sentence):
    """
    句子改错
    :param sentence: 句子文本
    :return: 改正后的句子, list(wrongs, rights, begin_idx, end_idx)
    """
    detail = []
    maybe_error_ids = get_sub_array(detect(sentence))
    # print('maybe_error_ids:', maybe_error_ids)
    # 取得字词对应表
    index_char_dict = dict()
    for index in maybe_error_ids:
        if len(index) == 1:
            # 字
            index_char_dict[','.join(map(str, index))] = sentence[index[0]]
        else:
            # 词
            index_char_dict[','.join(map(str, index))] = sentence[index[0]:index[-1]]
    for index, item in index_char_dict.items():
        # 字词纠错
        sentence, detail_word = _correct_item(sentence, index, item)
        if detail_word:
            detail.append(detail_word)
    return sentence, detail
예제 #4
0
# -*- coding: utf-8 -*-
"""
@author:XuMing([email protected])
@description: 
"""

from pycorrector import detector

# fixed
idx_errors = detector.detect('vd')
print(idx_errors)
예제 #5
0
# -*- coding: utf-8 -*-
"""
@author:XuMing([email protected])
@description: 
"""

from pycorrector import detector

idx_errors = detector.detect('少先队员因该为老人让坐')
print(idx_errors)