class Detector(object): def __init__(self, language_model_path='', word_freq_path='', custom_word_freq_path='', custom_confusion_path='', person_name_path='', place_name_path='', stopwords_path=''): self.name = 'detector' self.language_model_path = os.path.join(pwd_path, language_model_path) self.word_freq_path = os.path.join(pwd_path, word_freq_path) self.custom_word_freq_path = os.path.join(pwd_path, custom_word_freq_path) self.custom_confusion_path = os.path.join(pwd_path, custom_confusion_path) self.person_name_path = os.path.join(pwd_path, person_name_path) self.place_name_path = os.path.join(pwd_path, place_name_path) self.stopwords_path = os.path.join(pwd_path, stopwords_path) self.is_char_error_detect = True self.is_word_error_detect = True self.initialized_detector = False def initialize_detector(self): t1 = time.time() self.lm = kenlm.Model(self.language_model_path) t2 = time.time() logger.debug('Loaded language model: %s, spend: %s s' % (self.language_model_path, str(t2 - t1))) # 词、频数dict self.word_freq = self.load_word_freq_dict(self.word_freq_path) t3 = time.time() logger.debug('Loaded word freq file: %s, size: %d, spend: %s s' % (self.word_freq_path, len(self.word_freq), str(t3 - t2))) # 自定义混淆集 self.custom_confusion = self._get_custom_confusion_dict( self.custom_confusion_path) t4 = time.time() logger.debug('Loaded confusion file: %s, size: %d, spend: %s s' % (self.custom_confusion_path, len( self.custom_confusion), str(t4 - t3))) # 自定义切词词典 self.custom_word_freq = self.load_word_freq_dict( self.custom_word_freq_path) self.person_names = self.load_word_freq_dict(self.person_name_path) self.place_names = self.load_word_freq_dict(self.place_name_path) self.stopwords = self.load_word_freq_dict(self.stopwords_path) # 合并切词词典及自定义词典 self.custom_word_freq.update(self.person_names) self.custom_word_freq.update(self.place_names) self.custom_word_freq.update(self.stopwords) self.word_freq.update(self.custom_word_freq) t5 = time.time() logger.debug('Loaded custom word file: %s, size: %d, spend: %s s' % (self.custom_confusion_path, len( self.custom_word_freq), str(t5 - t4))) logger.debug('Loaded all word freq file done, size: %d' % len(self.word_freq)) self.tokenizer = Tokenizer(dict_path=self.word_freq_path, custom_word_freq_dict=self.custom_word_freq, custom_confusion_dict=self.custom_confusion) t6 = time.time() logger.info('Loaded dict ok, spend: %s s' % str(t6 - t1)) self.initialized_detector = True def check_detector_initialized(self): if not self.initialized_detector: self.initialize_detector() @staticmethod def load_word_freq_dict(path): """ 加载切词词典 :param path: :return: """ word_freq = {} with codecs.open(path, 'r', encoding='utf-8') as f: for line in f: line = line.strip() if line.startswith('#'): continue info = line.split() if len(info) < 1: continue word = info[0] # 取词频,默认1 freq = int(info[1]) if len(info) > 1 else 1 word_freq[word] = freq return word_freq def _get_custom_confusion_dict(self, path): """ 取自定义困惑集 :param path: :return: dict, {variant: origin}, eg: {"交通先行": "交通限行"} """ confusion = {} with codecs.open(path, 'r', encoding='utf-8') as f: for line in f: line = line.strip() if line.startswith('#'): continue info = line.split() if len(info) < 2: continue variant = info[0] origin = info[1] freq = int(info[2]) if len(info) > 2 else 1 self.word_freq[origin] = freq confusion[variant] = origin return confusion def set_language_model_path(self, path): self.check_detector_initialized() self.lm = kenlm.Model(path) logger.info('Loaded language model: %s' % path) def set_custom_confusion_dict(self, path): self.check_detector_initialized() custom_confusion = self._get_custom_confusion_dict(path) self.custom_confusion.update(custom_confusion) logger.info('Loaded confusion path: %s, size: %d' % (path, len(custom_confusion))) def set_custom_word(self, path): self.check_detector_initialized() word_freqs = self.load_word_freq_dict(path) # 合并字典 self.custom_word_freq.update(word_freqs) # 合并切词词典及自定义词典 self.word_freq.update(self.custom_word_freq) self.tokenizer = Tokenizer(dict_path=self.word_freq_path, custom_word_freq_dict=self.custom_word_freq, custom_confusion_dict=self.custom_confusion) for k, v in word_freqs.items(): self.set_word_frequency(k, v) logger.info('Loaded custom word path: %s, size: %d' % (path, len(word_freqs))) def enable_char_error(self, enable=True): """ is open char error detect :param enable: :return: """ self.is_char_error_detect = enable def enable_word_error(self, enable=True): """ is open word error detect :param enable: :return: """ self.is_word_error_detect = enable def ngram_score(self, chars): """ 取n元文法得分 :param chars: list, 以词或字切分 :return: """ self.check_detector_initialized() return self.lm.score(' '.join(chars), bos=False, eos=False) def ppl_score(self, words): """ 取语言模型困惑度得分,越小句子越通顺 :param words: list, 以词或字切分 :return: """ self.check_detector_initialized() return self.lm.perplexity(' '.join(words)) def word_frequency(self, word): """ 取词在样本中的词频 :param word: :return: """ self.check_detector_initialized() return self.word_freq.get(word, 0) def set_word_frequency(self, word, num): """ 更新在样本中的词频 """ self.check_detector_initialized() self.word_freq[word] = num return self.word_freq @staticmethod def _check_contain_error(maybe_err, maybe_errors): """ 检测错误集合(maybe_errors)是否已经包含该错误位置(maybe_err) :param maybe_err: [error_word, begin_pos, end_pos, error_type] :param maybe_errors: :return: """ error_word_idx = 0 begin_idx = 1 end_idx = 2 for err in maybe_errors: if maybe_err[error_word_idx] in err[error_word_idx] and maybe_err[begin_idx] >= err[begin_idx] and \ maybe_err[end_idx] <= err[end_idx]: return True return False def _add_maybe_error_item(self, maybe_err, maybe_errors): """ 新增错误 :param maybe_err: :param maybe_errors: :return: """ if maybe_err not in maybe_errors and not self._check_contain_error( maybe_err, maybe_errors): maybe_errors.append(maybe_err) @staticmethod def _get_maybe_error_index(scores, ratio=0.6745, threshold=1.4): """ 取疑似错字的位置,通过平均绝对离差(MAD) :param scores: np.array :param threshold: 阈值越小,得到疑似错别字越多 :return: """ scores = np.array(scores) if len(scores.shape) == 1: scores = scores[:, None] median = np.median(scores, axis=0) # get median of all scores margin_median = np.sqrt(np.sum((scores - median)**2, axis=-1)) # deviation from the median # 平均绝对离差值 med_abs_deviation = np.median(margin_median) if med_abs_deviation == 0: return [] y_score = ratio * margin_median / med_abs_deviation # 打平 scores = scores.flatten() maybe_error_indices = np.where((y_score > threshold) & (scores < median)) # 取全部疑似错误字的index return list(maybe_error_indices[0]) def detect(self, sentence): """ 检测句子中的疑似错误信息,包括[词、位置、错误类型] :param sentence: :return: [error_word, begin_pos, end_pos, error_type] """ maybe_errors = [] if not sentence.strip(): return maybe_errors self.check_detector_initialized() # 文本归一化 sentence = uniform(sentence) # 切词 tokens = self.tokenizer.tokenize(sentence) # print(tokens) # 自定义混淆集加入疑似错误词典 for confuse in self.custom_confusion: idx = sentence.find(confuse) if idx > -1: maybe_err = [ confuse, idx, idx + len(confuse), error_type["confusion"] ] self._add_maybe_error_item(maybe_err, maybe_errors) if self.is_word_error_detect: # 未登录词加入疑似错误词典 for word, begin_idx, end_idx in tokens: # pass blank if not word.strip(): continue # punctuation if word in PUNCTUATION_LIST: continue # pass num if word.isdigit(): continue # pass alpha if is_alphabet_string(word.lower()): continue # in dict if word in self.word_freq: continue maybe_err = [word, begin_idx, end_idx, error_type["word"]] self._add_maybe_error_item(maybe_err, maybe_errors) if self.is_char_error_detect: # 语言模型检测疑似错误字 ngram_avg_scores = [] try: for n in [2, 3]: scores = [] for i in range(len(sentence) - n + 1): word = sentence[i:i + n] score = self.ngram_score(list(word)) scores.append(score) if not scores: continue # 移动窗口补全得分 for _ in range(n - 1): scores.insert(0, scores[0]) scores.append(scores[-1]) avg_scores = [ sum(scores[i:i + n]) / len(scores[i:i + n]) for i in range(len(sentence)) ] ngram_avg_scores.append(avg_scores) # 取拼接后的ngram平均得分 sent_scores = list( np.average(np.array(ngram_avg_scores), axis=0)) # 取疑似错字信息 for i in self._get_maybe_error_index(sent_scores): maybe_err = [sentence[i], i, i + 1, error_type["char"]] self._add_maybe_error_item(maybe_err, maybe_errors) except IndexError as ie: logger.warn("index error, sentence:" + sentence + str(ie)) except Exception as e: logger.warn("detect error, sentence:" + sentence + str(e)) return sorted(maybe_errors, key=lambda k: k[1], reverse=False)
class Detector(object): def __init__(self, language_model_path=config.language_model_path, word_freq_path=config.word_freq_path, custom_word_freq_path=config.custom_word_freq_path, custom_confusion_path=config.custom_confusion_path, person_name_path=config.person_name_path, place_name_path=config.place_name_path, stopwords_path=config.stopwords_path, enable_rnnlm=False, rnnlm_vocab_path=config.rnnlm_vocab_path, rnnlm_model_dir=config.rnnlm_model_dir): self.name = 'detector' self.language_model_path = language_model_path self.word_freq_path = word_freq_path self.custom_word_freq_path = custom_word_freq_path self.custom_confusion_path = custom_confusion_path self.person_name_path = person_name_path self.place_name_path = place_name_path self.stopwords_path = stopwords_path self.is_char_error_detect = True self.is_word_error_detect = True self.initialized_detector = False self.enable_rnnlm = enable_rnnlm self.rnnlm_vocab_path = rnnlm_vocab_path self.rnnlm_model_dir = rnnlm_model_dir def initialize_detector(self): t1 = time.time() if self.enable_rnnlm: self.lm = LM(self.rnnlm_model_dir, self.rnnlm_vocab_path) logger.debug('Loaded language model: %s, spend: %s s' % (self.rnnlm_model_dir, str(time.time() - t1))) else: try: import kenlm except ImportError: raise ImportError('pycorrector dependencies are not fully installed, ' 'they are required for statistical language model.' 'Please use "pip install kenlm" to install it, not support Win.' 'if you are Win, Please install tensorflow and set enable_rnnlm=True.') self.lm = kenlm.Model(self.language_model_path) logger.debug('Loaded language model: %s, spend: %s s' % (self.language_model_path, str(time.time() - t1))) # 词、频数dict t2 = time.time() self.word_freq = self.load_word_freq_dict(self.word_freq_path) t3 = time.time() logger.debug('Loaded word freq file: %s, size: %d, spend: %s s' % (self.word_freq_path, len(self.word_freq), str(t3 - t2))) # 自定义混淆集 self.custom_confusion = self._get_custom_confusion_dict(self.custom_confusion_path) t4 = time.time() logger.debug('Loaded confusion file: %s, size: %d, spend: %s s' % (self.custom_confusion_path, len(self.custom_confusion), str(t4 - t3))) # 自定义切词词典 self.custom_word_freq = self.load_word_freq_dict(self.custom_word_freq_path) self.person_names = self.load_word_freq_dict(self.person_name_path) self.place_names = self.load_word_freq_dict(self.place_name_path) self.stopwords = self.load_word_freq_dict(self.stopwords_path) # 合并切词词典及自定义词典 self.custom_word_freq.update(self.person_names) self.custom_word_freq.update(self.place_names) self.custom_word_freq.update(self.stopwords) self.word_freq.update(self.custom_word_freq) t5 = time.time() logger.debug('Loaded custom word file: %s, size: %d, spend: %s s' % (self.custom_confusion_path, len(self.custom_word_freq), str(t5 - t4))) self.tokenizer = Tokenizer(dict_path=self.word_freq_path, custom_word_freq_dict=self.custom_word_freq, custom_confusion_dict=self.custom_confusion) t6 = time.time() logger.info('Loaded dict ok, spend: %s s' % str(t6 - t1)) self.initialized_detector = True def check_detector_initialized(self): if not self.initialized_detector: self.initialize_detector() @staticmethod def load_word_freq_dict(path): """ 加载切词词典 :param path: :return: """ word_freq = {} with codecs.open(path, 'r', encoding='utf-8') as f: for line in f: line = line.strip() if line.startswith('#'): continue info = line.split() if len(info) < 1: continue word = info[0] # 取词频,默认1 freq = int(info[1]) if len(info) > 1 else 1 word_freq[word] = freq return word_freq def _get_custom_confusion_dict(self, path): """ 取自定义困惑集 :param path: :return: dict, {variant: origin}, eg: {"交通先行": "交通限行"} """ confusion = {} with codecs.open(path, 'r', encoding='utf-8') as f: for line in f: line = line.strip() if line.startswith('#'): continue info = line.split() if len(info) < 2: continue variant = info[0] origin = info[1] freq = int(info[2]) if len(info) > 2 else 1 self.word_freq[origin] = freq confusion[variant] = origin return confusion def set_language_model_path(self, path): self.check_detector_initialized() import kenlm self.lm = kenlm.Model(path) logger.info('Loaded language model: %s' % path) def set_custom_confusion_dict(self, path): self.check_detector_initialized() custom_confusion = self._get_custom_confusion_dict(path) self.custom_confusion.update(custom_confusion) logger.info('Loaded confusion path: %s, size: %d' % (path, len(custom_confusion))) def set_custom_word(self, path): self.check_detector_initialized() word_freqs = self.load_word_freq_dict(path) # 合并字典 self.custom_word_freq.update(word_freqs) # 合并切词词典及自定义词典 self.word_freq.update(self.custom_word_freq) self.tokenizer = Tokenizer(dict_path=self.word_freq_path, custom_word_freq_dict=self.custom_word_freq, custom_confusion_dict=self.custom_confusion) for k, v in word_freqs.items(): self.set_word_frequency(k, v) logger.info('Loaded custom word path: %s, size: %d' % (path, len(word_freqs))) def enable_char_error(self, enable=True): """ is open char error detect :param enable: :return: """ self.is_char_error_detect = enable def enable_word_error(self, enable=True): """ is open word error detect :param enable: :return: """ self.is_word_error_detect = enable def ngram_score(self, chars): """ 取n元文法得分 :param chars: list, 以词或字切分 :return: """ self.check_detector_initialized() return self.lm.score(' '.join(chars), bos=False, eos=False) def char_scores(self, chars): """ 取RNN语言模型各字的得分 :param chars: list, 以字切分 :return: scores, list """ self.check_detector_initialized() return self.lm.char_scores(chars) def ppl_score(self, words): """ 取语言模型困惑度得分,越小句子越通顺 :param words: list, 以词或字切分 :return: """ self.check_detector_initialized() return self.lm.perplexity(' '.join(words)) def word_frequency(self, word): """ 取词在样本中的词频 :param word: :return: """ self.check_detector_initialized() return self.word_freq.get(word, 0) def set_word_frequency(self, word, num): """ 更新在样本中的词频 """ self.check_detector_initialized() self.word_freq[word] = num return self.word_freq @staticmethod def _check_contain_error(maybe_err, maybe_errors): """ 检测错误集合(maybe_errors)是否已经包含该错误位置(maybe_err) :param maybe_err: [error_word, begin_pos, end_pos, error_type] :param maybe_errors: :return: """ error_word_idx = 0 begin_idx = 1 end_idx = 2 for err in maybe_errors: if maybe_err[error_word_idx] in err[error_word_idx] and maybe_err[begin_idx] >= err[begin_idx] and \ maybe_err[end_idx] <= err[end_idx]: return True return False def _add_maybe_error_item(self, maybe_err, maybe_errors): """ 新增错误 :param maybe_err: :param maybe_errors: :return: """ if maybe_err not in maybe_errors and not self._check_contain_error(maybe_err, maybe_errors): maybe_errors.append(maybe_err) @staticmethod def _get_maybe_error_index(scores, ratio=0.6745, threshold=1.4): """ 取疑似错字的位置,通过平均绝对离差(MAD) :param scores: np.array :param threshold: 阈值越小,得到疑似错别字越多 :return: 全部疑似错误字的index: list """ result = [] scores = np.array(scores) if len(scores.shape) == 1: scores = scores[:, None] median = np.median(scores, axis=0) # get median of all scores margin_median = np.sqrt(np.sum((scores - median) ** 2, axis=-1)) # deviation from the median # 平均绝对离差值 med_abs_deviation = np.median(margin_median) if med_abs_deviation == 0: return result y_score = ratio * margin_median / med_abs_deviation # 打平 scores = scores.flatten() maybe_error_indices = np.where((y_score > threshold) & (scores < median)) # 取全部疑似错误字的index result = list(maybe_error_indices[0]) return result @staticmethod def _get_maybe_error_index_by_rnnlm(scores, n=3): """ 取疑似错字的位置,通过平均值上下三倍标准差之间属于正常点 :param scores: list, float :param threshold: 阈值越小,得到疑似错别字越多 :return: 全部疑似错误字的index: list """ std = np.std(scores, ddof=1) mean = np.mean(scores) down_limit = mean - n * std upper_limit = mean + n * std maybe_error_indices = np.where((scores > upper_limit) | (scores < down_limit)) # 取全部疑似错误字的index result = list(maybe_error_indices[0]) return result @staticmethod def is_filter_token(token): result = False # pass blank if not token.strip(): result = True # pass punctuation if token in PUNCTUATION_LIST: result = True # pass num if token.isdigit(): result = True # pass alpha if is_alphabet_string(token.lower()): result = True return result def detect(self, sentence): """ 检测句子中的疑似错误信息,包括[词、位置、错误类型] :param sentence: :return: list[list], [error_word, begin_pos, end_pos, error_type] """ maybe_errors = [] if not sentence.strip(): return maybe_errors # 初始化 self.check_detector_initialized() # 文本归一化 sentence = uniform(sentence) # 切词 tokens = self.tokenizer.tokenize(sentence) # print(tokens) # 自定义混淆集加入疑似错误词典 for confuse in self.custom_confusion: idx = sentence.find(confuse) if idx > -1: maybe_err = [confuse, idx, idx + len(confuse), ErrorType.confusion] self._add_maybe_error_item(maybe_err, maybe_errors) if self.is_word_error_detect: # 未登录词加入疑似错误词典 for word, begin_idx, end_idx in tokens: # pass filter word if self.is_filter_token(word): continue # pass in dict if word in self.word_freq: continue maybe_err = [word, begin_idx, end_idx, ErrorType.word] self._add_maybe_error_item(maybe_err, maybe_errors) if self.is_char_error_detect: # 语言模型检测疑似错误字 if self.enable_rnnlm: scores = self.char_scores(sentence) # 取疑似错字信息 for i in self._get_maybe_error_index_by_rnnlm(scores): token = sentence[i] # pass filter word if self.is_filter_token(token): continue maybe_err = [token, i, i + 1, ErrorType.char] # token, begin_idx, end_idx, error_type self._add_maybe_error_item(maybe_err, maybe_errors) else: try: ngram_avg_scores = [] for n in [2, 3]: scores = [] for i in range(len(sentence) - n + 1): word = sentence[i:i + n] score = self.ngram_score(list(word)) scores.append(score) if not scores: continue # 移动窗口补全得分 for _ in range(n - 1): scores.insert(0, scores[0]) scores.append(scores[-1]) avg_scores = [sum(scores[i:i + n]) / len(scores[i:i + n]) for i in range(len(sentence))] ngram_avg_scores.append(avg_scores) # 取拼接后的n-gram平均得分 sent_scores = list(np.average(np.array(ngram_avg_scores), axis=0)) # 取疑似错字信息 for i in self._get_maybe_error_index(sent_scores): token = sentence[i] # pass filter word if self.is_filter_token(token): continue maybe_err = [token, i, i + 1, ErrorType.char] # token, begin_idx, end_idx, error_type self._add_maybe_error_item(maybe_err, maybe_errors) except IndexError as ie: logger.warn("index error, sentence:" + sentence + str(ie)) except Exception as e: logger.warn("detect error, sentence:" + sentence + str(e)) return sorted(maybe_errors, key=lambda k: k[1], reverse=False)
class NameSort(object): def __init__(self, word_freq_path=config.word_freq_path, name_sort_path=ccm_conf.name_sort_path, leader_job_path=ccm_conf.leader_job_path, leader_job_freq_dict_path=ccm_conf.leader_job_freq_dict_path): self.leader_job_freq_dict = Detector.load_word_freq_dict( leader_job_freq_dict_path) self.word_freq_path = word_freq_path print(self.leader_job_freq_dict) self.tokenizer = Tokenizer( dict_path=self.word_freq_path, custom_word_freq_dict=self.leader_job_freq_dict) self.name_sort_path = name_sort_path self.leader_job_path = leader_job_path def is_filter_token(self, token): result = False # pass blank if not token.strip(): result = True # pass punctuation if token in PUNCTUATION_LIST: result = True # pass num if token.isdigit(): result = True # pass alpha if is_alphabet_string(token.lower()): result = True return result def load_ccm_word_freq_dict(self, path): """ 加载切词词典 :param path: :return: """ word_freq = {} with codecs.open(path, 'r', encoding='utf-8') as f: for line in f: line = line.strip() if line.startswith('#'): continue info = line.split('+') if len(info) < 1: continue word = info[0] # word为姓名 # 取词频,默认1 长度大于一时 freq=info[1]为顺序 否则定义为1 freq = int(info[1]) if len(info) > 1 else 1 word_freq[word] = freq # print("++++" + str(word_freq)) return word_freq def load_ccm_job_freq_dict(self, path): """ 加载切词词典 :param path: :return: """ word_freq = {} with codecs.open(path, 'r', encoding='utf-8') as f: for line in f: line = line.strip() if line.startswith('#') or not line: continue info = line.split(':') if len(info) < 1: continue # print("dddddd") word = info[0] # 名字-习近平 # print(word) job = info[1] # 职务+称谓 # print(job) s1 = job.split('?') if len(s1) > 1: # print(s1) s2 = s1[0].split('、') # 将职务用、隔开 s3 = s1[1].split('、') # 将称谓用、隔开 # print(s2) # print(s3) b = {'1': s2, '2': s3} else: s2 = s1[0].split('、') # 将职务用、隔开 b = {'1': s2} # 取词频,默认1 # freq = int(info[1]) if len(info) > 1 else 1 word_freq[word] = b return word_freq def ccm_sort(self, sentence): """ """ # 加载排序词典 name_model = self.load_ccm_word_freq_dict(self.name_sort_path) maybe_errors = [] if not sentence.strip(): return maybe_errors # 文本归一化 sentence = uniform(sentence) # 切词 tokens = self.tokenizer.tokenize(sentence) print(tokens) temp = None error_list = [] correct_list = [] new = [] i = -1 for word, begin_idx, end_idx in tokens: new.append(word) i += 1 if word in LINK_WORD: temp = None if name_model.get(word): if not temp: temp = name_model.get(word) continue else: if temp > name_model.get(word): p = tokens[i] tokens[i] = tokens[i - 2] tokens[i - 2] = p print(tokens[i][0]) print(tokens[i - 2][0]) correct_list.append((tokens[i][0], i)) correct_list.append((tokens[i - 2][0], i - 2)) error_list.append((tokens[i][0], i)) else: pass # print(tokens) # correct_list.append((tokens[i][0])) for word, p in correct_list: new[p] = word print(new) print("ls:" + str(correct_list)) correct = ''.join(new) print("correct:" + correct) # print(error_list) # print(tokens) # print(tokens[0]) return sorted(maybe_errors, key=lambda k: k[1], reverse=False) def name_job(self, sentence): """ """ # 加载人名-职务词典 job_model = self.load_ccm_job_freq_dict(self.leader_job_path) print(job_model) maybe_errors = [] if not sentence.strip(): return maybe_errors # 文本归一化 sentence = uniform(sentence) # 切词 tokens = self.tokenizer.tokenize(sentence) print(tokens) # temp = None error_list = [] correct_list = [] new = [] i = 0 j = 0 for word, begin_idx, end_idx in tokens: if job_model.get(word): print(i) # 如果找到人名了,那么现在的i就是该人名的坐标 a = job_model.get(word) front = a.get('1') temp_list = [] for x in range(j, i): # j就是起点坐标,i就是终点坐标 if self.leader_job_freq_dict.get(tokens[x][0]): if tokens[x][0] not in front: temp_list.append(tokens[x][0]) if temp_list: error_list.append({word: temp_list}) else: pass j = i + 1 # 起点坐标变为上一个人坐标的下一位坐标 i += 1 print(error_list)