def entityRecognize(word_list, question): entity_list = [] for word in word_list: entity = "" finalentity = "" for temp_entity in word_list[word_list.index(word):]: entity = entity + temp_entity all_entity = [entity] if len(entity) > 1: # print(entity) # print(1) if entity in mention2entity_dic: # 如果它有对应的实体 for alias in mention2entity_dic[entity]: all_entity.append(alias) for en in all_entity: same_name_entity_list = ccksNeo.get_entity_list_by_name(en) extra_name = ccksNeo.get_entity_info_by_name(en) for name in extra_name: if name[0][-1] == '名' or name[0][-1] == '称': if len(name[1]) > 1: if name[0] != '英文名' and name[0] != '英文名称' and name[0] != '外文名' and name[0] != '外文名称': entity_list.append(name[1]) if len(same_name_entity_list) >= 1: entity_list.append(en) # print(list(set(entity_list))) for entity1 in entity_list: # 如果短的指称被长的指称包含,检测短指称的一度关系名 temp = question for i in entity1: if i in question: temp = temp.replace(i, "") # temp_list = sentence.replace(entity1, "") # segmentor1 = Segmentor() # segmentor1.load("./ltpdata/ltp_data_v3.4.0/cws.model") # temp_list = segmentor1.segment(temp) # segmentor1.release() for entity2 in entity_list: if entity1 != entity2 and entity1 in entity2: # print(2) same_name_entity_list = ccksNeo.get_entity_list_by_name(entity1) flag = 0 for entitydict in same_name_entity_list: # print(entitydict, "用id查") # print(3) relations = ccksNeo.get_related_entities_by_id(entitydict['id']) # print(relations) for relation in relations: # 除掉实体的剩余句子 score = serviceWord2vec.get_similarity(list(jieba.cut(temp)), list(jieba.cut(relation['name']))) if score > 0.2: flag = 1 if flag == 0 and entity1 in entity_list: # print(entity_list) # print(entity1) entity_list.remove(entity1) print("entity_list", entity_list) # time.sleep(10) return entity_list
def entityRecognize(word_list): for word in word_list: entity = "" finalentity = "" for temp_entity in word_list[word_list.index(word):]: entity = entity + temp_entity if len(entity) > 1: # print(entity) print(1) same_name_entity_list = ccksNeo.get_entity_list_by_name(entity) if len(same_name_entity_list) >= 1: entity_list.append(entity) # print(entity_list) for entity1 in entity_list: # 如果短的指称被长的指称包含,检测短指称的一度关系名 for entity2 in entity_list: if entity1 != entity2 and entity1 in entity2: temp_list = sentence.replace(entity1, "") segmentor1 = Segmentor() segmentor1.load("./ltpdata/ltp_data_v3.4.0/cws.model") temp_list = segmentor1.segment(temp_list) segmentor1.release() print(2) same_name_entity_list = ccksNeo.get_entity_list_by_name(entity1) flag = 0 for entitydict in same_name_entity_list: print(entitydict, "用id查") print(3) relations = ccksNeo.get_related_entities_by_neoid(entitydict['id']) # print(relations) for relation in relations: # 除掉实体的剩余句子 # print(temp_list, relation['name']) ''' segmentor2 = Segmentor() segmentor2.load("./ltpdata/ltp_data_v3.4.0/cws.model") #print("测试处", relation['name']) relation_list = segmentor2.segment(relation['name']) segmentor2.release() ''' # print(temp_list) # print(relation_list) score = serviceWord2vec.get_similarity(temp_list, list(jieba.cut(relation['name']))) # print("测试分数", score) if score > 0.2: flag = 1 if flag == 0 and entity1 in entity_list: # print(entity_list) # print(entity1) entity_list.remove(entity1) print("entity_list",entity_list)
def get_realtion_info(relation_candidate, remain_sentence): # [name, relation, target_entity, target_entity_keyid] # temp_relations = ccksNeo.get_entity_info_by_keyid(entity_keyid) #该实体的信息 # 实体名,路径,目标实体 # print(temp_relations) relation_info = [] for candidate in relation_candidate: # for key, value in temp_relations.items(): #路径名,目标实体 segmentor1 = Segmentor() segmentor1.load("./ltpdata/ltp_data_v3.4.0/cws.model") temp = list(segmentor1.segment(remain_sentence)) segmentor1.release() guanxideci = jieba.cut(candidate[0]) for word in guanxideci: if word in model and word in temp: temp.remove(word) ''' segmentor2 = Segmentor() segmentor2.load("./ltpdata/ltp_data_v3.4.0/cws.model") temp2 = list(segmentor2.segment(candidate[1])) segmentor2.release() ''' ##################jaccard temp2 = [candidate[1]] set1 = set(temp) set2 = set(temp2) jaccard = jaccard_distance(set1, set2) edit = difflib.SequenceMatcher(None, question, candidate[1]).ratio() print(temp, temp2) w2v = serviceWord2vec.get_similarity(temp, list(jieba.cut(candidate[1]))) ''' if key == c_relation_name: is_correct = 1 else: is_correct = 0 ''' # relation_info.append([candidate[0], candidate[1], candidate[2], candidate[3], jaccard, edit, w2v]) # 实体,路径名,目标实体,jaccard距离,编辑距离,向量相似度 # print(relation_info) return relation_info '''
def automata(seg_list): threshold_1 = 0.6 # 向量相似度匹配的状态转移阈值 threshold_2 = 0.15 # 关系预测匹配的状态转移阈值 threshold_3 = 0.4 # 文本答案选择匹配的状态转移阈值 states = [{ 'header': None, 'tailer': None, 'available_words': [], 'path': [], 'score': 0 }] caches = {} for word in seg_list: new_states = [] for state in states: state['available_words'].append(word) # 对于START状态 if (state['header'] is None): entity_name = "".join(state['available_words']) same_name_entity_list = owlNeo4j.get_entity_list_by_name( entity_name) for entity in same_name_entity_list: new_states.append({ 'header': entity, 'tailer': None, 'available_words': [], 'path': [], 'score': 1 }) # 对于非START状态 else: if state['tailer'] is None: source = { 'name': state['header']['name'], 'label': state['header']['label'], 'neoId': state['header']['neoId'] } else: source = state['tailer'] if source['neoId'] is None: # neoId is None 意味着路径走到了一个不可跳转的状态 continue if source['neoId'] not in caches: # 整理这个实体的关系与属性集,加入到缓存中等待使用 caches[source['neoId']] = [] relations = owlNeo4j.get_related_entities_by_id( source['neoId']) for relation in relations: # 添加关系 caches[source['neoId']].append(relation) props = owlNeo4j.get_entity_info_by_id(source['neoId']) for prop in props: # 添加属性,如果已经有同名关系出现,则该属性不添加 if any(prop == relation['name'] for relation in caches[source['neoId']]): continue caches[source['neoId']].append({ 'name': prop, 'target_label': '属性值', 'target_name': props[prop], 'target_neoId': None }) # 对于所有关系属性逐个进行相似度匹配, 大于阈值就进行状态转移 link2state_map = {} for link in caches[source['neoId']]: score = serviceWord2vec.get_similarity( state['available_words'], list(jieba.cut(link['name']))) if score > threshold_1: # 如果之前没添加过同名关系,直接进行状态转移,记录跳转路径 if link['name'] not in link2state_map: new_path = [step for step in state['path']] target = { 'name': link['target_name'], 'label': link['target_label'], 'neoId': link['target_neoId'] } new_path.append([source, link['name'], target]) new_score = state['score'] * (1 + score - threshold_1) new_states.append({ 'header': state['header'], 'tailer': target, 'available_words': [], 'path': new_path, 'score': new_score }) link2state_map[link['name']] = len(new_states) - 1 # 如果之前已经添加过一个同名关系,说明此关系是多值类(比如:知名校友),直接把此关系追加到同名关系上 else: state_num = link2state_map[link['name']] new_tailer = new_states[state_num]['tailer'].copy() new_tailer[ 'neoId'] = None # 如果此关系是多值类,则它不能再进行状态转移,所以把tailer neoId标记为None new_states[state_num]['tailer'] = new_tailer target = { 'name': link['target_name'], 'label': link['target_label'], 'neoId': link['target_neoId'] } new_states[state_num]['path'].append( [source, link['name'], target]) states += new_states # 选择获取最高评分的那些路径 max_states = [] for state in states: if (state['header'] is not None): if (max_states == []) or (state['score'] > max_states[0]['score']): max_states = [state] elif (state['score'] == max_states[0]['score']): if (state['score'] == 1) and (len(state['available_words']) < len( max_states[0]['available_words']) ): # 在只识别到了实体的状态里,优先选择最长匹配到的实体 max_states = [state] else: max_states.append(state) # 再对状态里的中心实体根据实体知名度进行排序 entities = [ state['header'] for state in max_states if state['header'] is not None ] entities = serviceKG.eneities_sort(entities) # 如果只识别到实体,则返回实体列表,否则返回最优路径 if (max_states == []) or (max_states[0]['score'] == 0): return {'ents': entities, 'path': []} else: paths = [ state['path'] for state in max_states if state['header'] == entities[0] ] return {'ents': [entities[0]], 'path': paths[0]}
def entityLink(entity_list, question): # (通过实体名找到数据库中的各实体并通过评分策略找到中心实体) scores = [] allentity_info = [] for name in entity_list: simple_name = name if '_(' in name: simple_name = name[:name.find('_(')] elif '_(' in name: simple_name = name[:name.find('_(')] # print(4) name_simi_score = serviceWord2vec.get_similarity(list(jieba.cut(question)), list(jieba.cut(simple_name))) entity_total = ccksNeo.get_entity_list_by_name(name) # 指称的所有实体 # print(entity_total) in_question_word = 0 temp = question for j in simple_name: if j in question: temp = temp.replace(j, "") in_question_word = in_question_word + 1 temp = question for i in simple_name: if i in question: temp = temp.replace(i, "") # print("temp", temp) temp0 = temp # temp = question.replace(name, "") # 去掉指称的剩余句子 # print(temp) #剩余句子分词 for entity in entity_total: relation_list = [] entity_Id = entity['id'] # print(5) relations = ccksNeo.get_related_entities_by_id(entity['id']) # print(relations) max_relation_score = 0 relation_in_question = 0 for relation in relations: # 不同的关系,可能有类别相同的关系 relation_list.append(relation['name']) score = serviceWord2vec.get_similarity(list(jieba.cut(temp0)), list(jieba.cut(relation['name']))) # 只要实体关系和句子沾边 if score > max_relation_score: max_relation_score = score if relation['name'] in temp0: relation_in_question = 1 link_relation_num = len(relation_list) # relation_list_type = set(relation_list) # link_relation_type_num = len(relation_list_type) # print(question) if "《" + simple_name + "》" in question or "\"" + simple_name + "\"" in question or "“" + simple_name + "”" in question: be_included = 1 else: be_included = 0 relative_position = question.find(simple_name) / len(question) have_quesition_word = 0 # question_word_num = 0 min_distance = 100 for question_word in question_words: if question_word in question: have_quesition_word = 1 # question_word_num = question_word_num+1 if min_distance > abs(question.find(question_word) - question.find(simple_name)): min_distance = abs(question.find(question_word) - question.find(simple_name)) have_alpha_or_digit = 0 pattern1 = re.compile('[0-9]+') pattern2 = re.compile('[a-z]+') pattern3 = re.compile('[A-Z]+') match1 = pattern1.findall(simple_name) match2 = pattern2.findall(simple_name) match3 = pattern3.findall(simple_name) if match1 or match2 or match3: have_alpha_or_digit = 1 entity_length = len(simple_name) if simple_name in question: name_in_question = 1 else: name_in_question = 0 levenshtein_score = Levenshtein.distance(simple_name, question) ''' if name == c_name: is_correct_name =1 else: is_correct_name =0 if entity['keyId'] == c_keyid: is_correct_entity = 1 else: is_correct_entity = 0 print(q_id, entity_keyId, one_relation, link_relation_num, link_relation_type_num, be_included, relative_position, have_quesition_word, min_distance, have_alpha_or_digit, entity_length, is_correct_entity) sentence = q_id+' '+entity_keyId+' '+str(one_relation)+' '+str(link_relation_num)+' '+str(link_relation_type_num)+' '+str(be_included)+' '+str(relative_position)+' '+str(have_quesition_word)+' '+str(min_distance)+' '+str(have_alpha_or_digit)+' '+str(entity_length)+' '+str(is_correct_entity)+'\n' p = open("../NLPCC_KBQA/nlpcc-iccpol-2016.kbqa.training-data_processtry2.txt", 'a', encoding="utf-8") p.writelines(sentence) p.close() ''' entity_info = [name, entity_Id, name_simi_score, in_question_word, max_relation_score, relation_in_question, link_relation_num, be_included, relative_position, have_quesition_word, min_distance, have_alpha_or_digit, entity_length, name_in_question, levenshtein_score] allentity_info.append(entity_info) print(allentity_info) # time.sleep(10) return allentity_info '''
def automata(seg_list): threshold_1 = 0.5 # 向量相似度匹配的状态转移阈值 threshold_2 = 0.15 # 关系预测匹配的状态转移阈值 threshold_3 = 0.4 # 文本答案选择匹配的状态转移阈值 states = [{ 'header': None, 'tailer': None, 'available_words': [], 'path': [], 'score': 0 }] caches = {} for word in seg_list: new_states = [] for state in states: state['available_words'].append(word) # 对于START状态 if (state['header'] is None): entity_name = "".join(state['available_words']) same_name_entity_list = owlNeo4j.get_entity_list_by_name( entity_name) for entity in same_name_entity_list: new_states.append({ 'header': entity, 'tailer': None, 'available_words': [], 'path': [], 'score': 1 }) # 对于非START状态 else: if state['tailer'] is None: source = { 'name': state['header']['name'], 'label': state['header']['label'], 'neoId': state['header']['neoId'] } else: source = state['tailer'] if source['neoId'] is None: # neoId is None 意味着路径走到了一个不可跳转的状态 continue if source['neoId'] not in caches: # 整理这个实体的关系与属性集,加入到缓存中等待使用 caches[source['neoId']] = [] relations = owlNeo4j.get_related_entities_by_id( source['neoId']) for relation in relations: # 添加关系 caches[source['neoId']].append(relation) props = owlNeo4j.get_entity_info_by_id(source['neoId']) for prop in props: # 添加属性,如果已经有同名关系出现,则该属性不添加 if any(prop == relation['name'] for relation in caches[source['neoId']]): continue caches[source['neoId']].append({ 'name': prop, 'target_label': '属性值', 'target_name': props[prop], 'target_neoId': None }) # 对于所有关系属性逐个进行相似度匹配, 大于阈值就进行状态转移 link2state_map = {} for link in caches[source['neoId']]: score = serviceWord2vec.get_similarity( state['available_words'], list(jieba.cut(link['name']))) if score > threshold_1: # 如果之前没添加过同名关系,直接进行状态转移,记录跳转路径 if link['name'] not in link2state_map: new_path = [step for step in state['path']] target = { 'name': link['target_name'], 'label': link['target_label'], 'neoId': link['target_neoId'] } new_path.append([source, link['name'], target]) new_score = state['score'] * (1 + score - threshold_1) new_states.append({ 'header': state['header'], 'tailer': target, 'available_words': [], 'path': new_path, 'score': new_score }) link2state_map[link['name']] = len(new_states) - 1 # 如果之前已经添加过一个同名关系,说明此关系是多值类(比如:知名校友),直接把此关系追加到同名关系上 else: state_num = link2state_map[link['name']] new_tailer = new_states[state_num]['tailer'].copy() new_tailer[ 'neoId'] = None # 如果此关系是多值类,则它不能再进行状态转移,所以把tailer neoId标记为None new_states[state_num]['tailer'] = new_tailer target = { 'name': link['target_name'], 'label': link['target_label'], 'neoId': link['target_neoId'] } new_states[state_num]['path'].append( [source, link['name'], target]) states += new_states # 如果没有找到答案,则使用关系预测方法 if all(state['path'] == [] for state in states): relation_p = None for state in states: if (state['header'] is not None) and (state['available_words'] != []): source = { 'name': state['header']['name'], 'label': state['header']['label'], 'neoId': state['header']['neoId'] } if relation_p is None: question = '_' + ''.join(state['available_words']) res = owlSubServers.relation_predict(question) if res is None: break relation_p = res['answer'] point_predicted = res['point'] if point_predicted < threshold_2: break # 对于所有关系属性逐个进行相似度匹配, 大于阈值就进行状态转移 for link in caches[source['neoId']]: score = serviceWord2vec.get_similarity( list(jieba.cut(relation_p)), list(jieba.cut(link['name']))) if score > threshold_1: new_path = [step for step in state['path']] target = { 'name': link['target_name'], 'label': link['target_label'], 'neoId': link['target_neoId'] } new_path.append([source, link['name'], target]) new_score = state['score'] * (1 + score - threshold_1) states.append({ 'header': state['header'], 'tailer': target, 'available_words': [], 'path': new_path, 'score': new_score }) # 选择标注了头实体的状态,提取头实体的简介,从文本中选择答案 if all(state['path'] == [] for state in states): for state in states: if (state['header'] is not None) and (state['available_words'] != []): description = state['header']['description'] res = owlSubServers.answer_selection(str(''.join(seg_list)), str(description)) if res is None: break answer = res['answer'] point = float(res['point']) if point > threshold_3: abstract = answer if len( answer) < 10 else answer[:8] + '...' new_path = [step for step in state['path']] source = { 'name': state['header']['name'], 'label': state['header']['label'], 'neoId': state['header']['neoId'] } target = { 'name': abstract, 'label': '实体描述文本', 'neoId': None, 'ans_from_desc': answer } new_path.append([source, 'description', target]) new_score = state['score'] + 0.00001 states.append({ 'header': state['header'], 'tailer': target, 'available_words': [], 'path': new_path, 'score': new_score }) # 选择获取最高评分的那些路径 max_states = [] for state in states: if (state['header'] is not None): if (max_states == []) or (state['score'] > max_states[0]['score']): max_states = [state] elif (state['score'] == max_states[0]['score']): if (state['score'] == 1) and (len(state['available_words']) < len( max_states[0]['available_words']) ): # 在只识别到了实体的状态里,优先选择最长匹配到的实体 max_states = [state] else: max_states.append(state) # 再对状态里的中心实体根据实体知名度进行排序 entities = [ state['header'] for state in max_states if state['header'] is not None ] entities = serviceKG.eneities_sort(entities) # 如果只识别到实体,则返回实体列表,否则返回最优路径 if (max_states == []) or (max_states[0]['score'] == 1): return {'ents': entities, 'path': []} else: paths = [ state['path'] for state in max_states if state['header'] == entities[0] ] return {'ents': [entities[0]], 'path': paths[0]}
def knowledge_graph(question, neoid=None, autopick=False): #autopick表示是否开启自动选择 # 如果已经选好了实体,直接返回实体检索结果 if neoid is not None: return decorate(neoid, style='BASIC') question.strip() if any(num in question for num in num_list): switch = True else: switch = False for queryword in queryword_list: if queryword in question: question = question.replace(queryword, '') # 比较型问题 pattern = r'^.+比.+(高|低).*$' if re.search(pattern, question.decode('utf-8').encode('utf-8')) != None: seg_list = serviceQA.segment(question) seg_list_complete = [] for seg in seg_list: seg_list_complete.append(seg.word) relatedwords = [u'利率', u'产品利率', u'存款利率', u'贷款利率'] word_1, word_2 = '', '' for seg in seg_list_complete: if seg in namelist and seg_list_complete.index( seg) < seg_list_complete.index('比'): word_1 = seg continue if seg in namelist and seg_list_complete.index( seg) > seg_list_complete.index('比'): word_2 = seg break if len(owlNeo4j.get_entity_list_by_name(word_1)) > 0 and len( owlNeo4j.get_entity_list_by_name(word_2)) > 0: word_1 = owlNeo4j.get_entity_list_by_name(word_1)[0] word_2 = owlNeo4j.get_entity_list_by_name(word_2)[0] for word in relatedwords: if word in word_1 and word in word_2: return decorate(data='1', style='COM', question=question) #按类别查询 if 'c::' in question: category = question.split('c::')[1].strip() for node in kb: for tag in node['taglist'].split(','): score = owlNeo4j.entity_similarity(category, tag) if category == tag or score >= 0.5: return decorate('2', 'CAT', question=question) #按关系查询 if 'r::' in question: relation = question.split('r::')[1].strip() if relation.find('<') == -1: for link in links: score = serviceWord2vec.get_similarity( list(jieba.cut(relation)), list(jieba.cut(link['name']))) if relation == link['name'] or score >= 0.6: return decorate('3', 'LIN', question=question) else: return decorate('3', 'LIN', question=question) #归纳型问题 seg_list = serviceQA.segment(question) #seg_list_complete = [] for seg in seg_list: #seg_list_complete.append(seg.word) if seg.word in [u'利率', u'产品利率', u'存款利率', u'贷款利率']: for seg in seg_list: if seg.word in catelist: for seg in seg_list: if seg.word in num_dict: return decorate('4', 'IND', question=question) #检索型问题 for seg in seg_list: if seg.word in [u'利率', u'产品利率', u'存款利率', u'贷款利率']: for seg in seg_list: if seg.word in catelist: for seg in seg_list: if seg.word in [u'高于', u'低于', u'等于']: for seg in seg_list: if seg.flag == 'm': return decorate('5', 'RET', question=question) #流程性问题 pre = sequence_class.question_class(question) if pre == 1: result = serviceQA.autoseq(question) if result != 0: return decorate(result, style='QA') # 进行中文问答 qa_result = serviceQA.chinese_qa(question, switch) logging.info("qa_result:" + json.dumps(qa_result, encoding='utf-8', ensure_ascii=False)) if (qa_result is None): return None # 如果是实体检索 if 'question' in qa_result: # 如果存在(实体,关系)对的相似问题 return decorate(qa_result['question'], style='QUE') if len(qa_result['path']) == 0: # 如果path为空,即不存在关系 if autopick or (len(qa_result['ents']) == 1): # 如果开启自动选择或只存在一个实体 return decorate(qa_result['ents'][0]['neoId'], style='BASIC') else: # 如果存在多个实体且没开启自动选择 return decorate(qa_result['ents'], style='SNET') else: if qa_result['ents'][0]['neoId'] == None: return decorate(qa_result, style='TS') # 全文信息检索 return decorate(qa_result, style='QA') # 从属性里找答案,或者有匹配的(实体,属性,实体)
def entityLink(entity_list, question): # (通过实体名找到数据库中的各实体并通过评分策略找到中心实体) scores = [] allentity_info = [] for name in entity_list: print(4) entity_total = ccksNeo.get_entity_list_by_name(name) # 指称的所有实体 print(entity_total) temp = question.replace(name, "") # 去掉指称的剩余句子 # print(temp) segmentor1 = Segmentor() segmentor1.load("./ltpdata/ltp_data_v3.4.0/cws.model") temp = list(segmentor1.segment(temp)) # print(temp) #剩余句子分词 segmentor1.release() for entity in entity_total: # id = q_id relation_list = [] entity_Id = entity['id'] # print("用id") print(5) relations = ccksNeo.get_related_entities_by_neoid(entity['id']) print(relations) max_relation_score = 0 for relation in relations: # 不同的关系,可能有类别相同的关系 relation_list.append(relation['name']) ''' segmentor2 = Segmentor() segmentor2.load("./ltpdata/ltp_data_v3.4.0/cws.model") temp2 = list(segmentor2.segment(relation['name'])) segmentor2.release() ''' score = serviceWord2vec.get_similarity(temp, list(jieba.cut(relation['name']))) # 只要实体关系和句子沾边 # print(temp, temp2, score) if score > max_relation_score: max_relation_score = score link_relation_num = len(relation_list) relation_list_type = set(relation_list) link_relation_type_num = len(relation_list_type) # print(question) if "《" + name + "》" in question or "\"" + name + "\"" in question or "“" + name + "”" in question: be_included = 1 else: be_included = 0 relative_position = question.find(name) / len(question) have_quesition_word = 0 # question_word_num = 0 min_distance = 100 for question_word in question_words: if question_word in question: have_quesition_word = 1 # question_word_num = question_word_num+1 if min_distance > abs(question.find(question_word) - question.find(name)): min_distance = abs(question.find(question_word) - question.find(name)) have_alpha_or_digit = 0 pattern1 = re.compile('[0-9]+') pattern2 = re.compile('[a-z]+') pattern3 = re.compile('[A-Z]+') match1 = pattern1.findall(name) match2 = pattern2.findall(name) match3 = pattern3.findall(name) if match1 or match2 or match3: have_alpha_or_digit = 1 entity_length = len(name) ''' if name == c_name: is_correct_name =1 else: is_correct_name =0 if entity['keyId'] == c_keyid: is_correct_entity = 1 else: is_correct_entity = 0 print(q_id, entity_keyId, one_relation, link_relation_num, link_relation_type_num, be_included, relative_position, have_quesition_word, min_distance, have_alpha_or_digit, entity_length, is_correct_entity) sentence = q_id+' '+entity_keyId+' '+str(one_relation)+' '+str(link_relation_num)+' '+str(link_relation_type_num)+' '+str(be_included)+' '+str(relative_position)+' '+str(have_quesition_word)+' '+str(min_distance)+' '+str(have_alpha_or_digit)+' '+str(entity_length)+' '+str(is_correct_entity)+'\n' p = open("../NLPCC_KBQA/nlpcc-iccpol-2016.kbqa.training-data_processtry2.txt", 'a', encoding="utf-8") p.writelines(sentence) p.close() ''' entity_info = [name, entity_Id, max_relation_score, link_relation_num, link_relation_type_num, be_included, relative_position, have_quesition_word, min_distance, have_alpha_or_digit, entity_length] allentity_info.append(entity_info) # print(allentity_info) return allentity_info '''