def bloom(root, path=None): graph_result = {'nodes': [], 'links': [], 'answerpath': [], 'answerlist': []} if root is None: return graph_result pointer = 0 # 待被查询的节点队列指针 max_level = 3 # 添加根节点 entity_info = owlNeo4j.get_entity_info_by_id(root) new_node = {'id': 0, 'name': entity_info['name'], 'category': entity_info['label'], 'neoId': root, 'value': 0} graph_result['nodes'].append(new_node) # 广度递归后续节点,如果层数没有达到上限,并且还存在待被查询的节点,并且目前节点数小于100就继续 while (len(graph_result['nodes']) > pointer) and (len(graph_result['nodes']) < 100) and (graph_result['nodes'][pointer]['value'] < max_level): related_entities = owlNeo4j.get_related_entities_by_id(graph_result['nodes'][pointer]['neoId']) for related_entity in related_entities: relation = related_entity['name'] label = related_entity['target_label'] name = related_entity['target_name'] neoid = related_entity['target_neoId'] # 如果这个节点已经存在,只加入新关系,不加入新节点 flag_exist = False for i, node_exist in enumerate(graph_result['nodes']): if node_exist['neoId'] == neoid: new_edge = {'id': len(graph_result['links']), 'source': pointer, 'target': i, 'level': graph_result['nodes'][pointer]['value']+1, 'name': relation} graph_result['links'].append(new_edge) flag_exist = True break if flag_exist: continue # 如果这个节点之前不存在,加入新关系,加入新节点 new_node = {'id': len(graph_result['nodes']), 'name': name, 'category': label, 'neoId': neoid, 'value': graph_result['nodes'][pointer]['value']+1} new_edge = {'id': len(graph_result['links']), 'source': pointer, 'target': new_node['id'], 'level': new_node['value'], 'name': relation} graph_result['nodes'].append(new_node) graph_result['links'].append(new_edge) pointer += 1 graph_result['answerpath'].append(0) # 加入答案路径 if path is not None: graph_result['answerlist'] = path for index, triple in enumerate(path): i = None j = None for index0, e0 in enumerate(graph_result['nodes']): if triple[0]['neoId'] == e0['neoId']: i = index0 if triple[2]['neoId'] == e0['neoId']: j = index0 if i is None: i = len(graph_result['nodes']) graph_result['nodes'].append({'id': i, 'name': triple[0]['name'], 'neoId': triple[0]['neoId'], 'value': 1, 'category': triple[0]['label']}) if j is None: j = len(graph_result['nodes']) graph_result['nodes'].append({'id': j, 'name': triple[2]['name'], 'neoId': triple[2]['neoId'], 'value': 1, 'category': triple[2]['label']}) graph_result['links'].append({'id': len(graph_result['links']), 'source': i, 'target': j, 'name': triple[1]}) if i not in graph_result['answerpath']: graph_result['answerpath'].append(i) if j not in graph_result['answerpath']: graph_result['answerpath'].append(j) return graph_result
def entity_search(name=None, neoid=None, autopick=False): # 如果按id搜索,直接返回信息 if neoid is not None: return owlNeo4j.get_entity_info_by_id(neoid) # 如果是按名搜索,返回同名实体列表 else: results = owlNeo4j.get_entity_list_by_name(name) results_sorted = eneities_sort(results) if len(results_sorted) == 0: return [] if autopick or (len(results_sorted) == 1): return results_sorted[0] else: return results_sorted
def entity(request): neoid = request.GET.get('id', None) # signal = request.GET.get('signal', None) # owlNeo4j.set_KB(signal) response = HttpResponse( json.dumps(owlNeo4j.get_entity_info_by_id(int(neoid)), ensure_ascii=False)) response["Access-Control-Allow-Origin"] = "*" return response # def test11(request): # s = request.GET.get('str') # res = { # "code": 0, # "len": len(s) # } # # response = HttpResponse(json.dumps(res)) # # return response
def automata(seg_list): threshold_1 = 0.6 # 向量相似度匹配的状态转移阈值 threshold_2 = 0.15 # 关系预测匹配的状态转移阈值 threshold_3 = 0.4 # 文本答案选择匹配的状态转移阈值 states = [{ 'header': None, 'tailer': None, 'available_words': [], 'path': [], 'score': 0 }] caches = {} for word in seg_list: new_states = [] for state in states: state['available_words'].append(word) # 对于START状态 if (state['header'] is None): entity_name = "".join(state['available_words']) same_name_entity_list = owlNeo4j.get_entity_list_by_name( entity_name) for entity in same_name_entity_list: new_states.append({ 'header': entity, 'tailer': None, 'available_words': [], 'path': [], 'score': 1 }) # 对于非START状态 else: if state['tailer'] is None: source = { 'name': state['header']['name'], 'label': state['header']['label'], 'neoId': state['header']['neoId'] } else: source = state['tailer'] if source['neoId'] is None: # neoId is None 意味着路径走到了一个不可跳转的状态 continue if source['neoId'] not in caches: # 整理这个实体的关系与属性集,加入到缓存中等待使用 caches[source['neoId']] = [] relations = owlNeo4j.get_related_entities_by_id( source['neoId']) for relation in relations: # 添加关系 caches[source['neoId']].append(relation) props = owlNeo4j.get_entity_info_by_id(source['neoId']) for prop in props: # 添加属性,如果已经有同名关系出现,则该属性不添加 if any(prop == relation['name'] for relation in caches[source['neoId']]): continue caches[source['neoId']].append({ 'name': prop, 'target_label': '属性值', 'target_name': props[prop], 'target_neoId': None }) # 对于所有关系属性逐个进行相似度匹配, 大于阈值就进行状态转移 link2state_map = {} for link in caches[source['neoId']]: score = serviceWord2vec.get_similarity( state['available_words'], list(jieba.cut(link['name']))) if score > threshold_1: # 如果之前没添加过同名关系,直接进行状态转移,记录跳转路径 if link['name'] not in link2state_map: new_path = [step for step in state['path']] target = { 'name': link['target_name'], 'label': link['target_label'], 'neoId': link['target_neoId'] } new_path.append([source, link['name'], target]) new_score = state['score'] * (1 + score - threshold_1) new_states.append({ 'header': state['header'], 'tailer': target, 'available_words': [], 'path': new_path, 'score': new_score }) link2state_map[link['name']] = len(new_states) - 1 # 如果之前已经添加过一个同名关系,说明此关系是多值类(比如:知名校友),直接把此关系追加到同名关系上 else: state_num = link2state_map[link['name']] new_tailer = new_states[state_num]['tailer'].copy() new_tailer[ 'neoId'] = None # 如果此关系是多值类,则它不能再进行状态转移,所以把tailer neoId标记为None new_states[state_num]['tailer'] = new_tailer target = { 'name': link['target_name'], 'label': link['target_label'], 'neoId': link['target_neoId'] } new_states[state_num]['path'].append( [source, link['name'], target]) states += new_states # 选择获取最高评分的那些路径 max_states = [] for state in states: if (state['header'] is not None): if (max_states == []) or (state['score'] > max_states[0]['score']): max_states = [state] elif (state['score'] == max_states[0]['score']): if (state['score'] == 1) and (len(state['available_words']) < len( max_states[0]['available_words']) ): # 在只识别到了实体的状态里,优先选择最长匹配到的实体 max_states = [state] else: max_states.append(state) # 再对状态里的中心实体根据实体知名度进行排序 entities = [ state['header'] for state in max_states if state['header'] is not None ] entities = serviceKG.eneities_sort(entities) # 如果只识别到实体,则返回实体列表,否则返回最优路径 if (max_states == []) or (max_states[0]['score'] == 0): return {'ents': entities, 'path': []} else: paths = [ state['path'] for state in max_states if state['header'] == entities[0] ] return {'ents': [entities[0]], 'path': paths[0]}
def automata(seg_list): threshold_1 = 0.5 # 向量相似度匹配的状态转移阈值 threshold_2 = 0.15 # 关系预测匹配的状态转移阈值 threshold_3 = 0.4 # 文本答案选择匹配的状态转移阈值 states = [{ 'header': None, 'tailer': None, 'available_words': [], 'path': [], 'score': 0 }] caches = {} for word in seg_list: new_states = [] for state in states: state['available_words'].append(word) # 对于START状态 if (state['header'] is None): entity_name = "".join(state['available_words']) same_name_entity_list = owlNeo4j.get_entity_list_by_name( entity_name) for entity in same_name_entity_list: new_states.append({ 'header': entity, 'tailer': None, 'available_words': [], 'path': [], 'score': 1 }) # 对于非START状态 else: if state['tailer'] is None: source = { 'name': state['header']['name'], 'label': state['header']['label'], 'neoId': state['header']['neoId'] } else: source = state['tailer'] if source['neoId'] is None: # neoId is None 意味着路径走到了一个不可跳转的状态 continue if source['neoId'] not in caches: # 整理这个实体的关系与属性集,加入到缓存中等待使用 caches[source['neoId']] = [] relations = owlNeo4j.get_related_entities_by_id( source['neoId']) for relation in relations: # 添加关系 caches[source['neoId']].append(relation) props = owlNeo4j.get_entity_info_by_id(source['neoId']) for prop in props: # 添加属性,如果已经有同名关系出现,则该属性不添加 if any(prop == relation['name'] for relation in caches[source['neoId']]): continue caches[source['neoId']].append({ 'name': prop, 'target_label': '属性值', 'target_name': props[prop], 'target_neoId': None }) # 对于所有关系属性逐个进行相似度匹配, 大于阈值就进行状态转移 link2state_map = {} for link in caches[source['neoId']]: score = serviceWord2vec.get_similarity( state['available_words'], list(jieba.cut(link['name']))) if score > threshold_1: # 如果之前没添加过同名关系,直接进行状态转移,记录跳转路径 if link['name'] not in link2state_map: new_path = [step for step in state['path']] target = { 'name': link['target_name'], 'label': link['target_label'], 'neoId': link['target_neoId'] } new_path.append([source, link['name'], target]) new_score = state['score'] * (1 + score - threshold_1) new_states.append({ 'header': state['header'], 'tailer': target, 'available_words': [], 'path': new_path, 'score': new_score }) link2state_map[link['name']] = len(new_states) - 1 # 如果之前已经添加过一个同名关系,说明此关系是多值类(比如:知名校友),直接把此关系追加到同名关系上 else: state_num = link2state_map[link['name']] new_tailer = new_states[state_num]['tailer'].copy() new_tailer[ 'neoId'] = None # 如果此关系是多值类,则它不能再进行状态转移,所以把tailer neoId标记为None new_states[state_num]['tailer'] = new_tailer target = { 'name': link['target_name'], 'label': link['target_label'], 'neoId': link['target_neoId'] } new_states[state_num]['path'].append( [source, link['name'], target]) states += new_states # 如果没有找到答案,则使用关系预测方法 if all(state['path'] == [] for state in states): relation_p = None for state in states: if (state['header'] is not None) and (state['available_words'] != []): source = { 'name': state['header']['name'], 'label': state['header']['label'], 'neoId': state['header']['neoId'] } if relation_p is None: question = '_' + ''.join(state['available_words']) res = owlSubServers.relation_predict(question) if res is None: break relation_p = res['answer'] point_predicted = res['point'] if point_predicted < threshold_2: break # 对于所有关系属性逐个进行相似度匹配, 大于阈值就进行状态转移 for link in caches[source['neoId']]: score = serviceWord2vec.get_similarity( list(jieba.cut(relation_p)), list(jieba.cut(link['name']))) if score > threshold_1: new_path = [step for step in state['path']] target = { 'name': link['target_name'], 'label': link['target_label'], 'neoId': link['target_neoId'] } new_path.append([source, link['name'], target]) new_score = state['score'] * (1 + score - threshold_1) states.append({ 'header': state['header'], 'tailer': target, 'available_words': [], 'path': new_path, 'score': new_score }) # 选择标注了头实体的状态,提取头实体的简介,从文本中选择答案 if all(state['path'] == [] for state in states): for state in states: if (state['header'] is not None) and (state['available_words'] != []): description = state['header']['description'] res = owlSubServers.answer_selection(str(''.join(seg_list)), str(description)) if res is None: break answer = res['answer'] point = float(res['point']) if point > threshold_3: abstract = answer if len( answer) < 10 else answer[:8] + '...' new_path = [step for step in state['path']] source = { 'name': state['header']['name'], 'label': state['header']['label'], 'neoId': state['header']['neoId'] } target = { 'name': abstract, 'label': '实体描述文本', 'neoId': None, 'ans_from_desc': answer } new_path.append([source, 'description', target]) new_score = state['score'] + 0.00001 states.append({ 'header': state['header'], 'tailer': target, 'available_words': [], 'path': new_path, 'score': new_score }) # 选择获取最高评分的那些路径 max_states = [] for state in states: if (state['header'] is not None): if (max_states == []) or (state['score'] > max_states[0]['score']): max_states = [state] elif (state['score'] == max_states[0]['score']): if (state['score'] == 1) and (len(state['available_words']) < len( max_states[0]['available_words']) ): # 在只识别到了实体的状态里,优先选择最长匹配到的实体 max_states = [state] else: max_states.append(state) # 再对状态里的中心实体根据实体知名度进行排序 entities = [ state['header'] for state in max_states if state['header'] is not None ] entities = serviceKG.eneities_sort(entities) # 如果只识别到实体,则返回实体列表,否则返回最优路径 if (max_states == []) or (max_states[0]['score'] == 1): return {'ents': entities, 'path': []} else: paths = [ state['path'] for state in max_states if state['header'] == entities[0] ] return {'ents': [entities[0]], 'path': paths[0]}
def bloom(root, path=None, question=None): if root == '1': # root为'1'代表该类问题为比较性问题(注意是字符串的'1',不是数字1,root可是能数字1,表示neoId=1的节点) result = serviceQA.autocom(question) if result != 0: return result if root == '2': # root为'2'代表按类别查询 result = serviceQA.autocate(question) if result != 0: return result if root == '3': # root为'3'代表按关系查询 return serviceQA.autolink(question) if root == '4': # root为'4'代表归纳类问题 result = serviceQA.autoinduce(question) if result != 0: return result if root == '5': # root为5代表检索型问题 result = serviceQA.autoret(question) if result != 0: return result if root == '6': # root为6代表全文信息检索型问题 result = {'nodes': [], 'links': [], 'answerpath': [], 'answerlist': []} source = path[0] relation = path[1] target = path[2] new_node1 = { 'id': 0, 'name': source['name'], 'category': source['category'], 'neoId': None, 'content': source['问题'], 'value': 0 } # new_node2 = {'id': 1, 'name': target['name'], 'category': target['category'], 'neoId': None, 'content': target['ans_from_desc'], 'value': 1} # new_edge1 = {'id': 0, 'name': relation, 'level': 1, 'source': 0, 'target': 1} result['nodes'].append(new_node1) # result['nodes'].append(new_node2) # result['links'].append(new_edge1) return result graph_result = { 'nodes': [], 'links': [], 'answerpath': [], 'answerlist': [] } if root is None: return graph_result pointer = 0 # 待被查询的节点队列指针,指向graph_result中结点 max_level = 2 # 添加根节点 entity_info = owlNeo4j.get_entity_info_by_id(root) new_node = { 'id': 0, 'name': entity_info['name'], 'category': entity_info['category'], 'neoId': root, 'value': 0 } graph_result['nodes'].append(new_node) # 广度递归后续节点,如果层数没有达到上限,并且还存在待被查询的节点,并且目前节点数小于100就继续 while (len(graph_result['nodes']) > pointer) and (len( graph_result['nodes']) < 100) and ( graph_result['nodes'][pointer]['value'] < max_level): max_related_entities_num = 9 # if pointer != 0 else None related_entities = owlNeo4j.get_twoway_related_entities_by_id( graph_result['nodes'][pointer]['neoId'], max_num=max_related_entities_num) for related_entity in related_entities: relation = related_entity['name'] category = related_entity['target_category'] name = related_entity['target_name'] neoid = related_entity['target_neoId'] # 如果这个节点之前不存在,加入新节点 id_target = None for node_exist in graph_result['nodes']: if node_exist['neoId'] == neoid: id_target = node_exist['id'] break if id_target is None: #如果是新节点 new_node = { 'id': len(graph_result['nodes']), 'name': name, 'category': category, 'neoId': neoid, 'value': graph_result['nodes'][pointer]['value'] + 1 } #从0开始重新取id graph_result['nodes'].append(new_node) id_target = new_node['id'] # 如果目前与当前实体还没有关系,加入新关系 if not any( ((link_exist['source'] == pointer and link_exist['target'] == id_target) or (link_exist['source'] == id_target and link_exist['target'] == pointer)) for link_exist in graph_result['links']): new_edge = { 'id': len(graph_result['links']), 'name': relation, 'level': graph_result['nodes'][pointer]['value'] + 1 } if related_entity[ 'positive']: # positive为True表示正向关系,positive为False表示反向关系 new_edge['source'] = pointer new_edge['target'] = id_target else: new_edge['source'] = id_target new_edge['target'] = pointer graph_result['links'].append(new_edge) pointer += 1 graph_result['answerpath'].append(0) # 加入答案路径 if path is not None: graph_result['answerlist'] = path for index, triple in enumerate(path): # index表示索引,triple表示三元组 i = None j = None for index0, e0 in enumerate(graph_result['nodes']): if triple[0]['neoId'] == e0[ 'neoId']: #triple[0]['neoId']表示source结点 i = index0 if triple[2]['neoId'] == e0[ 'neoId']: #triple[2]['neoId']表示target结点 j = index0 if i is None: i = len(graph_result['nodes']) graph_result['nodes'].append({ 'id': i, 'name': triple[0]['name'], 'neoId': triple[0]['neoId'], 'value': 1, 'category': triple[0]['category'] }) if j is None: j = len(graph_result['nodes']) graph_result['nodes'].append({ 'id': j, 'name': triple[2]['name'], 'neoId': triple[2]['neoId'], 'value': 1, 'category': triple[2]['category'] }) graph_result['links'].append({ 'id': len(graph_result['links']), 'source': i, 'target': j, 'name': triple[1] }) if i not in graph_result['answerpath']: graph_result['answerpath'].append(i) if j not in graph_result['answerpath']: graph_result['answerpath'].append(j) return graph_result