def __init__(self, query_data, dependency=None): logger.info('Query Parsing...') self.relation = query_data.setdefault('relation', list()) self.entity = query_data.setdefault('entity', list()) self.intent = query_data['intent'] self.dependency = dependency self.relation_component_list = list() self.entity_component_list = list() # 获取实体和关系对应的子图组件 self.init_relation_component() self.init_entity_component() # 若有依存分析,根据依存分析来获取组件图 if self.dependency and len(self.dependency) > 0: logger.info('dependency exist.') print('dependency exist.') dm = DepMap(query_data['dependency'], self.relation_component_list, self.entity_component_list) if dm.check_dep(): # 使用依存分析,获取self.component_graph if nx.algorithms.is_weakly_connected(dm.dep_graph): self.query_graph = dm.dep_graph self.determine_intention() return else: logger.info('dependency wrong!') # 得到子图组件构成的集合,用图表示 self.component_graph = nx.disjoint_union_all( self.relation_component_list + self.entity_component_list) self.query_graph = copy.deepcopy(self.component_graph) self.query_graph = Graph(self.query_graph) self.old_query_graph = copy.deepcopy(self.component_graph) self.node_type_dict = self.query_graph.node_type_statistic() self.component_assemble() while len(self.query_graph.nodes) != len(self.old_query_graph.nodes) \ and not nx.algorithms.is_weakly_connected(self.query_graph): # 节点一样多说明上一轮没有合并 # 图已连通也不用合并 self.old_query_graph = copy.deepcopy(self.query_graph) self.node_type_dict = self.query_graph.node_type_statistic() self.component_assemble() while not nx.algorithms.is_weakly_connected(self.query_graph): # 若不连通则在联通分量之间添加默认边 flag = self.add_default_edge() if not flag: logger.info('default edge missing!') # 未添加上说明缺少默认边 break # 经过上面两个循环,得到连通的图,下面确定意图 self.determine_intention()
def __init__(self, graph, query): self.graph = nx.convert_node_labels_to_integers(graph) self.graph = Graph(self.graph) self.query = query # 用于指出查询意图为某归属属性的情况 self.intention_tail = '' self.entities = dict() self.init_entities() self.rels = list() self.init_rels() self.intentions = list() self.init_intention() self.query_dict = dict() self.serial_process() self.add_rels_to_entities() self.final_delete() self.init_query_dict()
def init_dep_graph(self): for item in self.dependency: f = item['from'] t = item['to'] if f['type'] == 'entity' and t['type'] == 'relation': temp_graph = self.from_ent_to_rel(f['value'], t['value']) self.dep_graph_list.append(temp_graph) elif f['type'] == 'relation' and t['type'] == 'entity': temp_graph = self.from_rel_to_ent(f['value'], t['value']) self.dep_graph_list.append(temp_graph) self.dep_graph = nx.disjoint_union_all(self.dep_graph_list) mapping = dict() for i, n in enumerate(self.dep_graph.nodes): mapping[n] = i nx.relabel_nodes(self.dep_graph, mapping, copy=False) self.dep_graph = Graph(self.dep_graph)
def add_default_edge(self): flag = False components_set = self.query_graph.get_connected_components_subgraph() d0 = Graph(components_set[0]).node_type_statistic() d1 = Graph(components_set[1]).node_type_statistic() candidates = itertools.product(d0.keys(), d1.keys()) candidates = list(candidates) for key, edge in DEFAULT_EDGE.items(): for c in candidates: if c[0] == edge['domain'] and c[1] == edge['range']: node_0 = d0[edge['domain']][0] node_1 = d1[edge['range']][0] self.query_graph.add_edge(node_0, node_1, key) flag = True return flag elif c[1] == edge['domain'] and c[0] == edge['range']: node_0 = d1[edge['domain']][0] node_1 = d0[edge['range']][0] self.query_graph.add_edge(node_0, node_1, key) flag = True return flag return flag
def add_default_edge_between_components(self, components_set, c1, c2): """ 在两个连通分量之间添加默认边 :param components_set: :param c1: :param c2: :return: """ flag = False d0 = Graph(components_set[c1]).node_type_statistic() d1 = Graph(components_set[c2]).node_type_statistic() candidates = itertools.product(d0.keys(), d1.keys()) candidates = list(candidates) trick_index = 0 for key, edge in DEFAULT_EDGE.items(): for c in candidates: if c[0] == edge['domain'] and c[1] == edge['range']: node_0 = d0[edge['domain']][trick_index] node_1 = d1[edge['range']][trick_index] self.query_graph.add_edge(node_0, node_1, key, type=key, value=edge['value']) flag = True return flag elif c[1] == edge['domain'] and c[0] == edge['range']: node_0 = d1[edge['domain']][trick_index] node_1 = d0[edge['range']][trick_index] self.query_graph.add_edge(node_0, node_1, key, type=key, value=edge['value']) flag = True return flag return flag
class QueryParser: def __init__(self, query_data, dependency=None): logger.info('Query Parsing...') self.relation = query_data.setdefault('relation', list()) self.entity = query_data.setdefault('entity', list()) self.intent = query_data['intent'] self.dependency = dependency self.relation_component_list = list() self.entity_component_list = list() # 获取实体和关系对应的子图组件 self.init_relation_component() self.init_entity_component() # 若有依存分析,根据依存分析来获取组件图 if self.dependency and len(self.dependency) > 0: logger.info('dependency exist.') print('dependency exist.') dm = DepMap(query_data['dependency'], self.relation_component_list, self.entity_component_list) if dm.check_dep(): # 使用依存分析,获取self.component_graph if nx.algorithms.is_weakly_connected(dm.dep_graph): self.query_graph = dm.dep_graph self.determine_intention() return else: logger.info('dependency wrong!') # 得到子图组件构成的集合,用图表示 self.component_graph = nx.disjoint_union_all( self.relation_component_list + self.entity_component_list) self.query_graph = copy.deepcopy(self.component_graph) self.query_graph = Graph(self.query_graph) self.old_query_graph = copy.deepcopy(self.component_graph) self.node_type_dict = self.query_graph.node_type_statistic() self.component_assemble() while len(self.query_graph.nodes) != len(self.old_query_graph.nodes) \ and not nx.algorithms.is_weakly_connected(self.query_graph): # 节点一样多说明上一轮没有合并 # 图已连通也不用合并 self.old_query_graph = copy.deepcopy(self.query_graph) self.node_type_dict = self.query_graph.node_type_statistic() self.component_assemble() while not nx.algorithms.is_weakly_connected(self.query_graph): # 若不连通则在联通分量之间添加默认边 flag = self.add_default_edge() if not flag: logger.info('default edge missing!') # 未添加上说明缺少默认边 break # 经过上面两个循环,得到连通的图,下面确定意图 self.determine_intention() def add_default_edge(self): flag = False components_set = self.query_graph.get_connected_components_subgraph() d0 = Graph(components_set[0]).node_type_statistic() d1 = Graph(components_set[1]).node_type_statistic() candidates = itertools.product(d0.keys(), d1.keys()) candidates = list(candidates) for key, edge in DEFAULT_EDGE.items(): for c in candidates: if c[0] == edge['domain'] and c[1] == edge['range']: node_0 = d0[edge['domain']][0] node_1 = d1[edge['range']][0] self.query_graph.add_edge(node_0, node_1, key) flag = True return flag elif c[1] == edge['domain'] and c[0] == edge['range']: node_0 = d1[edge['domain']][0] node_1 = d0[edge['range']][0] self.query_graph.add_edge(node_0, node_1, key) flag = True return flag return flag def determine_intention_by_type(self): # 根据意图类型来确定意图,对应determine_intention中的1.2 for n in self.query_graph.nodes: if self.query_graph.nodes[n]['label'] == 'concept': node_type = self.query_graph.nodes[n]['type'] if node_type == self.intent: self.add_intention_on_node(n) break def add_intention_on_node(self, node): self.query_graph.nodes[node]['intent'] = True def add_intention_on_nodes(self, node_list): """ 在一批节点中优先选择person节点进行插入 :param node_list: 带插入的一组空节点 :return: """ for node in node_list: if self.query_graph.nodes[node]['type'] == 'Person': self.query_graph.nodes[node]['intent'] = True return # 若找不到person node = node_list[0] self.query_graph.nodes[node]['intent'] = True def is_none_node(self, node): current_graph = self.query_graph if current_graph.nodes[node]['label'] != 'concept': return False neighbors = current_graph.neighbors(node) for n in neighbors: # 目前判断条件为出边没有字面值,认为是空节点 # 考虑拓扑排序的终点 if current_graph.nodes[n]['label'] == 'literal': return False return True def get_none_nodes(self): none_node_list = list() current_graph = self.query_graph for node in current_graph.nodes: if self.is_none_node(node): none_node_list.append(node) return none_node_list def determine_intention(self): """ 确定意图: 1、识别出意图类型的, 1.1、通过添加依存分析信息来确定意图 1.2、没有依存分析的情况下,默认第一个该类型的实体为查询意图 2、未识别出意图类型的,将图中的空节点(概念)认为是查询意图 2.1、若有多个空节点,默认第一个person空节点 2.2、没有空节点,默认第一个person概念节点 2.3、若没有person,默认第一个概念节点 :return: """ if self.intent: # 意图不为空 self.determine_intention_by_type() else: # 意图为空,添加意图,对应上文注释2 none_nodes = self.get_none_nodes() if len(none_nodes) > 0: # 有空节点 self.add_intention_on_nodes(none_nodes) else: # 没有空节点 temp_node_list = list() for n in self.query_graph.nodes: temp_node_list.append(n) self.add_intention_on_nodes(temp_node_list) def component_assemble(self): # 之后根据依存分析来完善 for k, v in self.node_type_dict.items(): if len(v) >= 2: combinations = itertools.combinations(v, 2) for pair in combinations: # 若两个节点之间连通,则跳过,不存在则合并 test_graph = nx.to_undirected(self.query_graph) if nx.has_path(test_graph, pair[0], pair[1]): continue else: mapping = {pair[0]: pair[1]} nx.relabel_nodes(self.query_graph, mapping, copy=False) break def init_entity_component(self): for e in self.entity: component = QueryGraphComponent(e) self.entity_component_list.append(nx.MultiDiGraph(component)) def init_relation_component(self): for r in self.relation: if r['type'] in RELATION_DATA.keys(): relation_component = nx.MultiDiGraph() relation_component.add_edge('temp_0', 'temp_1', r['type'], **r) for n in relation_component.nodes: relation_component.nodes[n]['label'] = 'concept' relation_component.nodes['temp_0']['type'] = RELATION_DATA[ r['type']]['domain'] relation_component.nodes['temp_1']['type'] = RELATION_DATA[ r['type']]['range'] self.relation_component_list.append(relation_component)
def __init__(self, query_data, dependency=None): logger.info('Query Graph Parsing...') self.error_info = None # print('Query Graph Parsing...') self.relation = query_data.setdefault('relation', list()) self.entity = query_data.setdefault('entity', list()) self.pre_process() self.intent = query_data['intent'] self.dependency = dependency self.relation_component_list = list() self.entity_component_list = list() # 获取实体和关系对应的子图组件 self.init_relation_component() self.init_entity_component() # 若有依存分析,根据依存分析来获取组件图 if self.dependency and len(self.dependency) > 0: logger.info('dependency exist.') # print('dependency exist.') dm = DepMap(self.dependency, self.relation_component_list, self.entity_component_list) if dm.check_dep( ) and dm.dep_graph and nx.algorithms.is_weakly_connected( dm.dep_graph): self.query_graph = dm.dep_graph self.query_graph = None # 得到子图组件构成的集合,用图表示 # self.component_graph = nx.disjoint_union_all(self.relation_component_list + self.entity_component_list) # self.component_graph的顺序决定了节点合并顺序,对最终构建的图有很大影响 self.component_graph = my_disjoint_union_all( self.entity_component_list + self.relation_component_list) self.query_graph = copy.deepcopy(self.component_graph) self.query_graph = Graph(self.query_graph) self.old_query_graph = copy.deepcopy(self.component_graph) self.node_type_dict = self.query_graph.node_type_statistic() self.component_assemble() while len(self.query_graph.nodes) != len(self.old_query_graph.nodes) \ and not nx.algorithms.is_weakly_connected(self.query_graph): # 节点一样多说明上一轮没有合并 # 图已连通也不用合并 self.old_query_graph = copy.deepcopy(self.query_graph) self.node_type_dict = self.query_graph.node_type_statistic() self.component_assemble() if not self.query_graph: self.error_info = '问句缺失必要实体' return while not nx.algorithms.is_weakly_connected(self.query_graph): # 若不连通则在联通分量之间添加默认边 flag = self.add_default_edge() if not flag: logger.info('default edge missing!') logger.info('graph is not connected!') self.error_info = 'graph is not connected!' # 未添加上说明缺少默认边 return # 经过上面两个循环,得到连通的图,下面确定意图 logger.info('connected graph is already') self.query_graph = nx.convert_node_labels_to_integers(self.query_graph) self.query_graph = Graph(self.query_graph) self.query_graph.show_log() logger.info('next is determine intention') self.determine_intention()
class QueryParser(object): """ 动态问答图语义解析模块 """ def __init__(self, query_data, dependency=None): logger.info('Query Graph Parsing...') self.error_info = None # print('Query Graph Parsing...') self.relation = query_data.setdefault('relation', list()) self.entity = query_data.setdefault('entity', list()) self.pre_process() self.intent = query_data['intent'] self.dependency = dependency self.relation_component_list = list() self.entity_component_list = list() # 获取实体和关系对应的子图组件 self.init_relation_component() self.init_entity_component() # 若有依存分析,根据依存分析来获取组件图 if self.dependency and len(self.dependency) > 0: logger.info('dependency exist.') # print('dependency exist.') dm = DepMap(self.dependency, self.relation_component_list, self.entity_component_list) if dm.check_dep( ) and dm.dep_graph and nx.algorithms.is_weakly_connected( dm.dep_graph): self.query_graph = dm.dep_graph self.query_graph = None # 得到子图组件构成的集合,用图表示 # self.component_graph = nx.disjoint_union_all(self.relation_component_list + self.entity_component_list) # self.component_graph的顺序决定了节点合并顺序,对最终构建的图有很大影响 self.component_graph = my_disjoint_union_all( self.entity_component_list + self.relation_component_list) self.query_graph = copy.deepcopy(self.component_graph) self.query_graph = Graph(self.query_graph) self.old_query_graph = copy.deepcopy(self.component_graph) self.node_type_dict = self.query_graph.node_type_statistic() self.component_assemble() while len(self.query_graph.nodes) != len(self.old_query_graph.nodes) \ and not nx.algorithms.is_weakly_connected(self.query_graph): # 节点一样多说明上一轮没有合并 # 图已连通也不用合并 self.old_query_graph = copy.deepcopy(self.query_graph) self.node_type_dict = self.query_graph.node_type_statistic() self.component_assemble() if not self.query_graph: self.error_info = '问句缺失必要实体' return while not nx.algorithms.is_weakly_connected(self.query_graph): # 若不连通则在联通分量之间添加默认边 flag = self.add_default_edge() if not flag: logger.info('default edge missing!') logger.info('graph is not connected!') self.error_info = 'graph is not connected!' # 未添加上说明缺少默认边 return # 经过上面两个循环,得到连通的图,下面确定意图 logger.info('connected graph is already') self.query_graph = nx.convert_node_labels_to_integers(self.query_graph) self.query_graph = Graph(self.query_graph) self.query_graph.show_log() logger.info('next is determine intention') self.determine_intention() def add_default_edge_between_components(self, components_set, c1, c2): """ 在两个连通分量之间添加默认边 :param components_set: :param c1: :param c2: :return: """ flag = False d0 = Graph(components_set[c1]).node_type_statistic() d1 = Graph(components_set[c2]).node_type_statistic() candidates = itertools.product(d0.keys(), d1.keys()) candidates = list(candidates) trick_index = 0 for key, edge in DEFAULT_EDGE.items(): for c in candidates: if c[0] == edge['domain'] and c[1] == edge['range']: node_0 = d0[edge['domain']][trick_index] node_1 = d1[edge['range']][trick_index] self.query_graph.add_edge(node_0, node_1, key, type=key, value=edge['value']) flag = True return flag elif c[1] == edge['domain'] and c[0] == edge['range']: node_0 = d1[edge['domain']][trick_index] node_1 = d0[edge['range']][trick_index] self.query_graph.add_edge(node_0, node_1, key, type=key, value=edge['value']) flag = True return flag return flag def add_default_edge(self): """ 添加默认边 :return: """ flag = False components_set = self.query_graph.get_connected_components_subgraph() for i in range(len(components_set)): flag = self.add_default_edge_between_components( components_set, i, i + 1) if flag: break return flag def determine_intention_by_type(self): # 根据意图类型来确定意图,对应determine_intention中的1.2 for n in self.query_graph.nodes: if self.query_graph.nodes[n]['label'] == 'concept': node_type = self.query_graph.nodes[n]['type'] if node_type == self.intent: self.add_intention_on_node(n) break def add_intention_on_node(self, node): self.query_graph.nodes[node]['intent'] = True def add_intention_on_nodes(self, node_list): """ 在一批节点中优先选择person节点进行插入 :param node_list: 带插入的一组空节点 :return: """ for node in node_list: if self.query_graph.nodes[node]['type'] == 'Person': self.query_graph.nodes[node]['intent'] = True return # 若找不到person node = node_list[0] self.query_graph.nodes[node]['intent'] = True def get_intention_candidate(self): """ 获取候选意图节点id :return:候选意图节点id """ logger.info('all concept node as intention candidates') intention_candidates = self.query_graph.get_concept_nodes() logger.info('intention candidates is %s' % str(intention_candidates)) if self.intent: # 意图识别提供了意图类型 logger.info('intention type is %s' % self.intent) new_intention_candidates = [ x for x in intention_candidates if self.query_graph.nodes[x].get('type') == self.intent ] intention_candidates = new_intention_candidates logger.info('intention candidates is %s' % str(intention_candidates)) if len(intention_candidates) == 0: # print('intention recognizer module produce wrong intention!') logger.info('intention recognizer module produce wrong intention!') self.error_info = '意图冲突!' return none_nodes = self.query_graph.get_none_nodes(self.intent) if len(none_nodes) > 0: logger.info('the graph has %d blank node: %s' % (len(none_nodes), str(none_nodes))) intention_candidates = [ x for x in intention_candidates if x in none_nodes ] logger.info('intention candidates is %s' % str(intention_candidates)) return intention_candidates def determine_intention(self): """ 确定意图: 1. 意图识别模块通过关键词,获取意图类型; 2. 根据依存分析模块,将句法依存树根节点附近的实体节点中作为候选意图节点,若上一步得到了意图类型,删去候选意图中的与意图类型冲突的节点; 3. 在所有候选节点中,若有空节点(即没有字面值描述的节点),则将候选节点集合中的所有空节点作为新的候选节点集合; 4. 若上一步选出的节点有多个,则优先选择Person类型的节点。 5. 在候选节点集合中,按照候选意图节点的入度与出度之差,对候选节点进行排序,选出入度与出度之差最大的节点; :return: """ intention_candidates = self.get_intention_candidate() if self.error_info: return logger.info('determine intention by degree') criterion_dict = dict() for node in intention_candidates: criterion_dict[node] = self.query_graph.get_out_index(node) m = min(criterion_dict.values()) # 考虑到多个节点上都有最值 intention_nodes = [k for k, v in criterion_dict.items() if v == m] logger.info('nodes: %s have degree: %d' % (str(intention_nodes), m)) logger.info('final intention node is %d' % intention_nodes[0]) self.add_intention_on_node(intention_nodes[0]) def get_person_nodes(self, candidates): """ 从候选节点中选出任务person的节点 :param candidates: :return: """ node_list = list() for node in candidates: if self.query_graph.nodes[node].get('type') == 'Person': node_list.append(node) return node_list def component_assemble(self): # 之后根据依存分析来完善 for k, v in self.node_type_dict.items(): if len(v) >= 2: combinations = itertools.combinations(v, 2) combinations = sorted(combinations, key=self.query_graph.get_outdiff) for pair in combinations: # 若两个节点之间连通,则跳过,不存在则合并 test_graph = nx.to_undirected(self.query_graph) if nx.has_path(test_graph, pair[0], pair[1]): continue else: mapping = {pair[0]: pair[1]} nx.relabel_nodes(self.query_graph, mapping, copy=False) break def init_entity_component(self): for e in self.entity: component = QueryGraphComponent(e) self.entity_component_list.append(nx.MultiDiGraph(component)) def company_trick(self): """ 当同时出现法人,总经理、员工等关系时,抛弃“的公司”关系 :return: """ company_rels = ['LegalPerson', 'Employ', 'Manager', 'WorkFor'] flag = False for rel in self.relation: if rel['type'] in company_rels and rel['value'] != '的公司': # 存在非'的公司'的关系 flag = True if not flag: return for r in self.relation: if r['value'] == '的公司': self.relation.remove(r) break def init_relation_component(self): self.company_trick() for r in self.relation: if r['type'] in RELATION_DATA.keys(): relation_component = nx.MultiDiGraph() relation_component.add_edge('temp_0', 'temp_1', r['type'], **r) for n in relation_component.nodes: relation_component.nodes[n]['label'] = 'concept' relation_component.nodes['temp_0']['type'] = RELATION_DATA[ r['type']]['domain'] relation_component.nodes['temp_1']['type'] = RELATION_DATA[ r['type']]['range'] self.relation_component_list.append(relation_component) def pre_process(self): """ 对账号进行过滤,如果实体中出现QQ实体,则在关系中过滤ChasQQ关系 :return: """ """ self.account = ['QQ_NUM', 'MOB_NUM', 'PHONE_NUM', 'IDCARD_VALUE', 'EMAIL_VALUE', 'WECHAT_VALUE', 'QQ_GROUP_NUM', 'WX_GROUP_NUM', 'ALIPAY_VALUE', 'DOUYIN_VALUE', 'JD_VALUE', 'TAOBAO_VALUE', 'MICROBLOG_VALUE', 'UNLABEL', 'VEHCARD_VALUE', 'IMEI_VALUE', 'MAC_VALUE'] self.p_has_account_list = ['QQ', 'MobileNum', 'FixedPhone', 'Idcard', 'Email', 'WeChat', 'QQGroup', 'WeChatGroup', 'Alipay', 'DouYin', 'JD', 'TaoBao', 'MicroBlog', 'UNLABEL', 'PlateNum', 'IMEI', 'MAC'] """ account_dict = { 'QQ_NUM': ['ChasQQ', 'PhasQQ'], 'MOB_NUM': ['PhasMobileNum', 'ChasMobileNum'], 'EMAIL_VALUE': ['PhasEmail'], 'WECHAT_VALUE': ['PhasWeChat'], 'ALIPAY_VALUE': ['PhasAlipay'], 'DOUYIN_VALUE': ['PhasDouYin'], 'JD_VALUE': ['PhasJD'], 'TAOBAO_VALUE': ['PhasTaoBao'], 'MICROBLOG_VALUE': ['PhasMicroBlog'], 'VEHCARD_VALUE': ['PhasVehicleCard'], 'IDCARD_VALUE': ['PhasIdcard'] } for e in self.entity: if e['type'] in account_dict.keys(): for rel_name in account_dict[e['type']]: new_relation = [ x for x in self.relation if x['type'] != rel_name ] self.relation = new_relation
class QueryInterface(object): """ 实现从问答图到查询接口的转化 """ def __init__(self, graph, query): self.graph = nx.convert_node_labels_to_integers(graph) self.graph = Graph(self.graph) self.query = query # 用于指出查询意图为某归属属性的情况 self.intention_tail = '' self.entities = dict() self.init_entities() self.rels = list() self.init_rels() self.intentions = list() self.init_intention() self.query_dict = dict() self.serial_process() self.add_rels_to_entities() self.final_delete() self.init_query_dict() def add_rels_to_entities(self): """ 将关系挂到相关实体上 :return: """ for rel in self.rels: rel_id = rel['id'] entity_1, entity_2 = rel['rel'].split('-') self.add_rel_to_entities(rel_id, entity_1) self.add_rel_to_entities(rel_id, entity_2) def add_rel_to_entities(self, rel_id, entity_id): """ 将一个关系的id挂到相关实体上 :param rel_id: :param entity_id: :return: """ for _, entities in self.entities.items(): for e in entities: if e['id'] == entity_id: if not e.get('rel'): e['rel'] = list() e['rel'].append(rel_id) return def literal_node_reduction(self): """ 将图上所有字面值节点规约为与其直接相连的对象节点的一个属性值 :return: new graph """ new_graph = deepcopy(self.graph) for node in self.graph.nodes: if self.graph.nodes[node]['label'] == 'literal': key = self.graph.nodes[node]['entity']['type'] value = self.graph.nodes[node]['entity']['value'] temp_dict = dict() temp_dict[key] = value for p in self.graph.predecessors(node): if 'data' not in new_graph.nodes[p].keys(): new_graph.nodes[p]['data'] = dict() if key not in new_graph.nodes[p]['data'].keys(): new_graph.nodes[p]['data'][key] = list() """ new_graph.nodes[p]['data'].update(temp_dict) """ new_graph.nodes[p]['data'][key].append(value) new_graph.remove_edge(p, node) new_graph.remove_node(node) return new_graph def node_rename(self): """ 将节点按类型信息改名 :return: """ # 此Dict用于将图中节点改名 map_dict = dict() for node in self.graph.nodes: if not isinstance(node, int): continue if self.graph.nodes[node]['label'] == 'concept': map_dict[node] = self.graph.nodes[node]['type'].lower( ) + '%d' % node if self.graph.nodes[node].get( 'intent') and self.graph.is_none_node(node): """ # map_dict[node] = self.graph.nodes[node]['type'].upper() + 'S' """ map_dict[node] = get_complex( self.graph.nodes[node]['type']).upper() new_graph = nx.relabel_nodes(self.graph, mapping=map_dict) self.graph = new_graph def is_belong_property(self, edge): """ 判断一条边是否对应归属属性 :param edge: (n, m, k)三元组,表示从n-k->m :return: """ n, m, k = edge for key, value in RELATION_DATA.items(): if k == key and value['belong']: dom = self.graph.nodes[n]['type'] ran = self.graph.nodes[m]['type'] if dom == value['domain'] and ran == value['range']: return True return False def belong_reduction(self): """ 按边是否属于归属属性,对图进行规约 :return: """ new_graph = deepcopy(self.graph) for edge in self.graph.edges: if self.is_belong_property(edge): # 如果这条边对应归属属性 # 将后一个节点的信息添加到前一个节点 # 还要将后一个点的所有边添加到前一个点 # 删除后一个节点 n1, n2, k = edge if n1 not in new_graph.nodes or n2 not in new_graph.nodes: continue if n2.isupper(): # 说明后一个节点为查询意图,不再规约 self.intention_tail = '.%s' % new_graph.nodes[n2].get( 'type') # new_graph.remove_node(n2) continue remain_belong_reduction(new_graph, n1, n2) return new_graph def graph_reduction(self): """ 对图结构进行规约 :return: """ self.node_rename() self.graph = self.literal_node_reduction() self.graph = self.belong_reduction() def init_entities(self): """ 将图中的实体进行处理,得到接口中对应的实体 :return: """ self.graph_reduction() for node in self.graph.nodes: # 空节点也占一个 temp_dict = dict() node_type = self.graph.nodes[node]['type'] if node_type not in self.entities.keys(): self.entities[node_type] = list() temp_dict['id'] = node temp_data = self.graph.nodes[node].get('data') if temp_data: temp_dict.update(self.graph.nodes[node].get('data')) self.entities[node_type].append(temp_dict) def init_rels(self): for i, edge in enumerate(self.graph.edges): n, m, k = edge if self.is_belong_property((n, m, k)): continue edge_id = i + 1 temp_dict = dict() temp_dict['id'] = 'relation%d' % edge_id self.graph.get_edge_data(n, m, k)['edge_id'] = 'relation%d' % edge_id temp_dict['rel'] = '%s-%s' % (str(n), str(m)) temp_dict['type'] = k temp_dict['value'] = self.graph.get_edge_data(n, m, k).get('value') self.rels.append(temp_dict) def get_intent_node(self): for node in self.graph.nodes: if self.graph.nodes[node].get('intent'): return node def isolated_node_process(self, shortest_path): for node in self.graph.nodes: if node not in shortest_path: for p in self.graph.predecessors(node): if 'data' not in self.graph.nodes[p].keys(): self.graph.nodes[p]['data'] = dict() for k, v in self.graph.get_edge_data(p, node).items(): temp_dict = {k: node} self.graph.nodes[p]['data'].update(temp_dict) def init_intention(self): top_sort = nx.topological_sort(self.graph) current = next(top_sort) header = current intention_str = str(header) intent_node = self.get_intent_node() shortest_path = nx.shortest_path(self.graph, header, intent_node) for node in shortest_path: if node == header: continue old = current current = node edge = self.graph.get_edge_data(old, current, default=None) k = list(edge.keys())[0] if self.is_belong_property((old, current, k)): continue if edge: for v in edge.values(): edge_id = v['edge_id'] intention_str += '.%s' % str(edge_id) intention_str += self.intention_tail self.intentions.append(intention_str) # 未在最短路径上的点,处理 self.isolated_node_process(shortest_path) # self.init_entities() def init_query_dict(self): self.query_dict['query'] = self.query self.query_dict['intentions'] = self.intentions self.query_dict['entities'] = self.entities self.query_dict['rels'] = self.rels def get_query_data(self): q_data = dict_low_case(self.query_dict) return q_data def serial_process(self): """ 对查询实体重新按顺序编号 :return: """ for e_type in self.entities: temp_type = e_type.lower() flag = False for n, e in enumerate(self.entities[e_type]): entity_id = e['id'] if entity_id == entity_id.upper(): # 存在一个全大写的实体 flag = True continue if flag: new_id = '%s%d' % (temp_type, n) else: new_id = '%s%d' % (temp_type, n + 1) self.find_replace(entity_id, new_id) e['id'] = new_id def find_replace(self, entity_id, new_id): """ 在intention和rels查找entity_id替换为new_id :param entity_id: :param new_id: :return: """ intent_str = self.intentions[0] intent_str = intent_str.replace(entity_id, new_id) self.intentions[0] = intent_str for relation in self.rels: relation['rel'] = relation['rel'].replace(entity_id, new_id) def final_delete(self): """ 针对测试需求,删除全大写的实体 :return: """ for e_type in self.entities: for e in self.entities[e_type]: entity_id = e['id'] if entity_id == entity_id.upper() and not e.get('rel'): self.entities[e_type].remove(e) break if len(self.entities[e_type]) == 0: self.entities.pop(e_type) break