def correct(sentence): """ 句子改错 :param sentence: 句子文本 :return: 改正后的句子, list(wrongs, rights, begin_idx, end_idx) """ detail = [] maybe_error_ids = get_sub_array(detect(sentence)) # print('maybe_error_ids:', maybe_error_ids) # 取得字词对应表 index_char_dict = dict() for index in maybe_error_ids: if len(index) == 1: # 字 index_char_dict[','.join(map(str, index))] = sentence[index[0]] else: # 词 index_char_dict[','.join(map( str, index))] = sentence[index[0]:index[-1]] for index, item in index_char_dict.items(): # 字词纠错 sentence, detail_word = _correct_item(sentence, index, item) if detail_word: detail.append(detail_word) return sentence, detail
def correct(sentence, param_ec = 1.5, param_gd = 2.5): detail = [] # # detecting for errors maybe_error_ids = get_valid_sub_array(sentence, get_sub_array(detect(sentence))) # # transfer index of error chars into pairs of (idx, error_chars) suspect_chars = [[','.join([str(i[0]), str(i[-1])]), sentence[i[0]: i[-1]]] for i in maybe_error_ids] # # statistical correction sentence, detail_stat = correct_stat(sentence, suspect_chars, param_ec, param_gd) detail += detail_stat # # rule-based correction sentence, detail_rule = correct_rule(sentence) detail += detail_rule return sentence, detail
def correct(sentence): """ 句子改错 :param sentence: 句子文本 :return: 改正后的句子, list(wrongs, rights, begin_idx, end_idx) """ detail = [] maybe_error_ids = get_sub_array(detect(sentence)) # print('maybe_error_ids:', maybe_error_ids) # 取得字词对应表 index_char_dict = dict() for index in maybe_error_ids: if len(index) == 1: # 字 index_char_dict[','.join(map(str, index))] = sentence[index[0]] else: # 词 index_char_dict[','.join(map(str, index))] = sentence[index[0]:index[-1]] for index, item in index_char_dict.items(): # 字词纠错 sentence, detail_word = _correct_item(sentence, index, item) if detail_word: detail.append(detail_word) return sentence, detail
# -*- coding: utf-8 -*- """ @author:XuMing([email protected]) @description: """ from pycorrector import detector # fixed idx_errors = detector.detect('vd') print(idx_errors)
# -*- coding: utf-8 -*- """ @author:XuMing([email protected]) @description: """ from pycorrector import detector idx_errors = detector.detect('少先队员因该为老人让坐') print(idx_errors)