Esempio n. 1
0
    def __init__(self, query_data, dependency=None):
        logger.info('Query Parsing...')
        self.relation = query_data.setdefault('relation', list())
        self.entity = query_data.setdefault('entity', list())
        self.intent = query_data['intent']
        self.dependency = dependency
        self.relation_component_list = list()
        self.entity_component_list = list()
        # 获取实体和关系对应的子图组件
        self.init_relation_component()
        self.init_entity_component()

        # 若有依存分析,根据依存分析来获取组件图
        if self.dependency and len(self.dependency) > 0:
            logger.info('dependency exist.')
            print('dependency exist.')
            dm = DepMap(query_data['dependency'], self.relation_component_list,
                        self.entity_component_list)
            if dm.check_dep():
                # 使用依存分析,获取self.component_graph
                if nx.algorithms.is_weakly_connected(dm.dep_graph):
                    self.query_graph = dm.dep_graph
                    self.determine_intention()
                    return
                else:
                    logger.info('dependency wrong!')
        # 得到子图组件构成的集合,用图表示
        self.component_graph = nx.disjoint_union_all(
            self.relation_component_list + self.entity_component_list)
        self.query_graph = copy.deepcopy(self.component_graph)
        self.query_graph = Graph(self.query_graph)
        self.old_query_graph = copy.deepcopy(self.component_graph)

        self.node_type_dict = self.query_graph.node_type_statistic()
        self.component_assemble()

        while len(self.query_graph.nodes) != len(self.old_query_graph.nodes) \
                and not nx.algorithms.is_weakly_connected(self.query_graph):
            # 节点一样多说明上一轮没有合并
            # 图已连通也不用合并
            self.old_query_graph = copy.deepcopy(self.query_graph)
            self.node_type_dict = self.query_graph.node_type_statistic()
            self.component_assemble()
        while not nx.algorithms.is_weakly_connected(self.query_graph):
            # 若不连通则在联通分量之间添加默认边
            flag = self.add_default_edge()
            if not flag:
                logger.info('default edge missing!')
                # 未添加上说明缺少默认边
                break
        # 经过上面两个循环,得到连通的图,下面确定意图
        self.determine_intention()
Esempio n. 2
0
    def __init__(self, graph, query):
        self.graph = nx.convert_node_labels_to_integers(graph)
        self.graph = Graph(self.graph)

        self.query = query
        # 用于指出查询意图为某归属属性的情况
        self.intention_tail = ''

        self.entities = dict()
        self.init_entities()

        self.rels = list()
        self.init_rels()
        self.intentions = list()
        self.init_intention()
        self.query_dict = dict()

        self.serial_process()
        self.add_rels_to_entities()
        self.final_delete()
        self.init_query_dict()
Esempio n. 3
0
 def init_dep_graph(self):
     for item in self.dependency:
         f = item['from']
         t = item['to']
         if f['type'] == 'entity' and t['type'] == 'relation':
             temp_graph = self.from_ent_to_rel(f['value'], t['value'])
             self.dep_graph_list.append(temp_graph)
         elif f['type'] == 'relation' and t['type'] == 'entity':
             temp_graph = self.from_rel_to_ent(f['value'], t['value'])
             self.dep_graph_list.append(temp_graph)
     self.dep_graph = nx.disjoint_union_all(self.dep_graph_list)
     mapping = dict()
     for i, n in enumerate(self.dep_graph.nodes):
         mapping[n] = i
     nx.relabel_nodes(self.dep_graph, mapping, copy=False)
     self.dep_graph = Graph(self.dep_graph)
Esempio n. 4
0
 def add_default_edge(self):
     flag = False
     components_set = self.query_graph.get_connected_components_subgraph()
     d0 = Graph(components_set[0]).node_type_statistic()
     d1 = Graph(components_set[1]).node_type_statistic()
     candidates = itertools.product(d0.keys(), d1.keys())
     candidates = list(candidates)
     for key, edge in DEFAULT_EDGE.items():
         for c in candidates:
             if c[0] == edge['domain'] and c[1] == edge['range']:
                 node_0 = d0[edge['domain']][0]
                 node_1 = d1[edge['range']][0]
                 self.query_graph.add_edge(node_0, node_1, key)
                 flag = True
                 return flag
             elif c[1] == edge['domain'] and c[0] == edge['range']:
                 node_0 = d1[edge['domain']][0]
                 node_1 = d0[edge['range']][0]
                 self.query_graph.add_edge(node_0, node_1, key)
                 flag = True
                 return flag
     return flag
Esempio n. 5
0
 def add_default_edge_between_components(self, components_set, c1, c2):
     """
     在两个连通分量之间添加默认边
     :param components_set:
     :param c1:
     :param c2:
     :return:
     """
     flag = False
     d0 = Graph(components_set[c1]).node_type_statistic()
     d1 = Graph(components_set[c2]).node_type_statistic()
     candidates = itertools.product(d0.keys(), d1.keys())
     candidates = list(candidates)
     trick_index = 0
     for key, edge in DEFAULT_EDGE.items():
         for c in candidates:
             if c[0] == edge['domain'] and c[1] == edge['range']:
                 node_0 = d0[edge['domain']][trick_index]
                 node_1 = d1[edge['range']][trick_index]
                 self.query_graph.add_edge(node_0,
                                           node_1,
                                           key,
                                           type=key,
                                           value=edge['value'])
                 flag = True
                 return flag
             elif c[1] == edge['domain'] and c[0] == edge['range']:
                 node_0 = d1[edge['domain']][trick_index]
                 node_1 = d0[edge['range']][trick_index]
                 self.query_graph.add_edge(node_0,
                                           node_1,
                                           key,
                                           type=key,
                                           value=edge['value'])
                 flag = True
                 return flag
     return flag
Esempio n. 6
0
class QueryParser:
    def __init__(self, query_data, dependency=None):
        logger.info('Query Parsing...')
        self.relation = query_data.setdefault('relation', list())
        self.entity = query_data.setdefault('entity', list())
        self.intent = query_data['intent']
        self.dependency = dependency
        self.relation_component_list = list()
        self.entity_component_list = list()
        # 获取实体和关系对应的子图组件
        self.init_relation_component()
        self.init_entity_component()

        # 若有依存分析,根据依存分析来获取组件图
        if self.dependency and len(self.dependency) > 0:
            logger.info('dependency exist.')
            print('dependency exist.')
            dm = DepMap(query_data['dependency'], self.relation_component_list,
                        self.entity_component_list)
            if dm.check_dep():
                # 使用依存分析,获取self.component_graph
                if nx.algorithms.is_weakly_connected(dm.dep_graph):
                    self.query_graph = dm.dep_graph
                    self.determine_intention()
                    return
                else:
                    logger.info('dependency wrong!')
        # 得到子图组件构成的集合,用图表示
        self.component_graph = nx.disjoint_union_all(
            self.relation_component_list + self.entity_component_list)
        self.query_graph = copy.deepcopy(self.component_graph)
        self.query_graph = Graph(self.query_graph)
        self.old_query_graph = copy.deepcopy(self.component_graph)

        self.node_type_dict = self.query_graph.node_type_statistic()
        self.component_assemble()

        while len(self.query_graph.nodes) != len(self.old_query_graph.nodes) \
                and not nx.algorithms.is_weakly_connected(self.query_graph):
            # 节点一样多说明上一轮没有合并
            # 图已连通也不用合并
            self.old_query_graph = copy.deepcopy(self.query_graph)
            self.node_type_dict = self.query_graph.node_type_statistic()
            self.component_assemble()
        while not nx.algorithms.is_weakly_connected(self.query_graph):
            # 若不连通则在联通分量之间添加默认边
            flag = self.add_default_edge()
            if not flag:
                logger.info('default edge missing!')
                # 未添加上说明缺少默认边
                break
        # 经过上面两个循环,得到连通的图,下面确定意图
        self.determine_intention()

    def add_default_edge(self):
        flag = False
        components_set = self.query_graph.get_connected_components_subgraph()
        d0 = Graph(components_set[0]).node_type_statistic()
        d1 = Graph(components_set[1]).node_type_statistic()
        candidates = itertools.product(d0.keys(), d1.keys())
        candidates = list(candidates)
        for key, edge in DEFAULT_EDGE.items():
            for c in candidates:
                if c[0] == edge['domain'] and c[1] == edge['range']:
                    node_0 = d0[edge['domain']][0]
                    node_1 = d1[edge['range']][0]
                    self.query_graph.add_edge(node_0, node_1, key)
                    flag = True
                    return flag
                elif c[1] == edge['domain'] and c[0] == edge['range']:
                    node_0 = d1[edge['domain']][0]
                    node_1 = d0[edge['range']][0]
                    self.query_graph.add_edge(node_0, node_1, key)
                    flag = True
                    return flag
        return flag

    def determine_intention_by_type(self):
        # 根据意图类型来确定意图,对应determine_intention中的1.2
        for n in self.query_graph.nodes:
            if self.query_graph.nodes[n]['label'] == 'concept':
                node_type = self.query_graph.nodes[n]['type']
                if node_type == self.intent:
                    self.add_intention_on_node(n)
                    break

    def add_intention_on_node(self, node):
        self.query_graph.nodes[node]['intent'] = True

    def add_intention_on_nodes(self, node_list):
        """
        在一批节点中优先选择person节点进行插入
        :param node_list: 带插入的一组空节点
        :return:
        """
        for node in node_list:
            if self.query_graph.nodes[node]['type'] == 'Person':
                self.query_graph.nodes[node]['intent'] = True
                return
        # 若找不到person
        node = node_list[0]
        self.query_graph.nodes[node]['intent'] = True

    def is_none_node(self, node):
        current_graph = self.query_graph
        if current_graph.nodes[node]['label'] != 'concept':
            return False
        neighbors = current_graph.neighbors(node)
        for n in neighbors:
            # 目前判断条件为出边没有字面值,认为是空节点
            # 考虑拓扑排序的终点
            if current_graph.nodes[n]['label'] == 'literal':
                return False
        return True

    def get_none_nodes(self):
        none_node_list = list()
        current_graph = self.query_graph
        for node in current_graph.nodes:
            if self.is_none_node(node):
                none_node_list.append(node)
        return none_node_list

    def determine_intention(self):
        """
        确定意图:
        1、识别出意图类型的,
        1.1、通过添加依存分析信息来确定意图
        1.2、没有依存分析的情况下,默认第一个该类型的实体为查询意图
        2、未识别出意图类型的,将图中的空节点(概念)认为是查询意图
        2.1、若有多个空节点,默认第一个person空节点
        2.2、没有空节点,默认第一个person概念节点
        2.3、若没有person,默认第一个概念节点
        :return:
        """
        if self.intent:
            # 意图不为空
            self.determine_intention_by_type()
        else:
            # 意图为空,添加意图,对应上文注释2
            none_nodes = self.get_none_nodes()
            if len(none_nodes) > 0:
                # 有空节点
                self.add_intention_on_nodes(none_nodes)
            else:
                # 没有空节点
                temp_node_list = list()
                for n in self.query_graph.nodes:
                    temp_node_list.append(n)
                self.add_intention_on_nodes(temp_node_list)

    def component_assemble(self):
        # 之后根据依存分析来完善
        for k, v in self.node_type_dict.items():
            if len(v) >= 2:
                combinations = itertools.combinations(v, 2)
                for pair in combinations:
                    # 若两个节点之间连通,则跳过,不存在则合并
                    test_graph = nx.to_undirected(self.query_graph)
                    if nx.has_path(test_graph, pair[0], pair[1]):
                        continue
                    else:
                        mapping = {pair[0]: pair[1]}
                        nx.relabel_nodes(self.query_graph, mapping, copy=False)
                        break

    def init_entity_component(self):
        for e in self.entity:
            component = QueryGraphComponent(e)
            self.entity_component_list.append(nx.MultiDiGraph(component))

    def init_relation_component(self):
        for r in self.relation:
            if r['type'] in RELATION_DATA.keys():
                relation_component = nx.MultiDiGraph()
                relation_component.add_edge('temp_0', 'temp_1', r['type'], **r)
                for n in relation_component.nodes:
                    relation_component.nodes[n]['label'] = 'concept'

                relation_component.nodes['temp_0']['type'] = RELATION_DATA[
                    r['type']]['domain']
                relation_component.nodes['temp_1']['type'] = RELATION_DATA[
                    r['type']]['range']
                self.relation_component_list.append(relation_component)
Esempio n. 7
0
    def __init__(self, query_data, dependency=None):
        logger.info('Query Graph Parsing...')
        self.error_info = None

        # print('Query Graph Parsing...')
        self.relation = query_data.setdefault('relation', list())
        self.entity = query_data.setdefault('entity', list())

        self.pre_process()

        self.intent = query_data['intent']
        self.dependency = dependency
        self.relation_component_list = list()
        self.entity_component_list = list()
        # 获取实体和关系对应的子图组件
        self.init_relation_component()
        self.init_entity_component()

        # 若有依存分析,根据依存分析来获取组件图
        if self.dependency and len(self.dependency) > 0:
            logger.info('dependency exist.')
            # print('dependency exist.')
            dm = DepMap(self.dependency, self.relation_component_list,
                        self.entity_component_list)
            if dm.check_dep(
            ) and dm.dep_graph and nx.algorithms.is_weakly_connected(
                    dm.dep_graph):
                self.query_graph = dm.dep_graph

        self.query_graph = None
        # 得到子图组件构成的集合,用图表示
        # self.component_graph = nx.disjoint_union_all(self.relation_component_list + self.entity_component_list)
        # self.component_graph的顺序决定了节点合并顺序,对最终构建的图有很大影响
        self.component_graph = my_disjoint_union_all(
            self.entity_component_list + self.relation_component_list)
        self.query_graph = copy.deepcopy(self.component_graph)
        self.query_graph = Graph(self.query_graph)
        self.old_query_graph = copy.deepcopy(self.component_graph)
        self.node_type_dict = self.query_graph.node_type_statistic()
        self.component_assemble()

        while len(self.query_graph.nodes) != len(self.old_query_graph.nodes) \
                and not nx.algorithms.is_weakly_connected(self.query_graph):
            # 节点一样多说明上一轮没有合并
            # 图已连通也不用合并
            self.old_query_graph = copy.deepcopy(self.query_graph)
            self.node_type_dict = self.query_graph.node_type_statistic()
            self.component_assemble()
        if not self.query_graph:
            self.error_info = '问句缺失必要实体'
            return
        while not nx.algorithms.is_weakly_connected(self.query_graph):
            # 若不连通则在联通分量之间添加默认边
            flag = self.add_default_edge()
            if not flag:
                logger.info('default edge missing!')
                logger.info('graph is not connected!')
                self.error_info = 'graph is not connected!'
                # 未添加上说明缺少默认边
                return

        # 经过上面两个循环,得到连通的图,下面确定意图
        logger.info('connected graph is already')
        self.query_graph = nx.convert_node_labels_to_integers(self.query_graph)
        self.query_graph = Graph(self.query_graph)
        self.query_graph.show_log()
        logger.info('next is determine intention')
        self.determine_intention()
Esempio n. 8
0
class QueryParser(object):
    """
    动态问答图语义解析模块
    """
    def __init__(self, query_data, dependency=None):
        logger.info('Query Graph Parsing...')
        self.error_info = None

        # print('Query Graph Parsing...')
        self.relation = query_data.setdefault('relation', list())
        self.entity = query_data.setdefault('entity', list())

        self.pre_process()

        self.intent = query_data['intent']
        self.dependency = dependency
        self.relation_component_list = list()
        self.entity_component_list = list()
        # 获取实体和关系对应的子图组件
        self.init_relation_component()
        self.init_entity_component()

        # 若有依存分析,根据依存分析来获取组件图
        if self.dependency and len(self.dependency) > 0:
            logger.info('dependency exist.')
            # print('dependency exist.')
            dm = DepMap(self.dependency, self.relation_component_list,
                        self.entity_component_list)
            if dm.check_dep(
            ) and dm.dep_graph and nx.algorithms.is_weakly_connected(
                    dm.dep_graph):
                self.query_graph = dm.dep_graph

        self.query_graph = None
        # 得到子图组件构成的集合,用图表示
        # self.component_graph = nx.disjoint_union_all(self.relation_component_list + self.entity_component_list)
        # self.component_graph的顺序决定了节点合并顺序,对最终构建的图有很大影响
        self.component_graph = my_disjoint_union_all(
            self.entity_component_list + self.relation_component_list)
        self.query_graph = copy.deepcopy(self.component_graph)
        self.query_graph = Graph(self.query_graph)
        self.old_query_graph = copy.deepcopy(self.component_graph)
        self.node_type_dict = self.query_graph.node_type_statistic()
        self.component_assemble()

        while len(self.query_graph.nodes) != len(self.old_query_graph.nodes) \
                and not nx.algorithms.is_weakly_connected(self.query_graph):
            # 节点一样多说明上一轮没有合并
            # 图已连通也不用合并
            self.old_query_graph = copy.deepcopy(self.query_graph)
            self.node_type_dict = self.query_graph.node_type_statistic()
            self.component_assemble()
        if not self.query_graph:
            self.error_info = '问句缺失必要实体'
            return
        while not nx.algorithms.is_weakly_connected(self.query_graph):
            # 若不连通则在联通分量之间添加默认边
            flag = self.add_default_edge()
            if not flag:
                logger.info('default edge missing!')
                logger.info('graph is not connected!')
                self.error_info = 'graph is not connected!'
                # 未添加上说明缺少默认边
                return

        # 经过上面两个循环,得到连通的图,下面确定意图
        logger.info('connected graph is already')
        self.query_graph = nx.convert_node_labels_to_integers(self.query_graph)
        self.query_graph = Graph(self.query_graph)
        self.query_graph.show_log()
        logger.info('next is determine intention')
        self.determine_intention()

    def add_default_edge_between_components(self, components_set, c1, c2):
        """
        在两个连通分量之间添加默认边
        :param components_set:
        :param c1:
        :param c2:
        :return:
        """
        flag = False
        d0 = Graph(components_set[c1]).node_type_statistic()
        d1 = Graph(components_set[c2]).node_type_statistic()
        candidates = itertools.product(d0.keys(), d1.keys())
        candidates = list(candidates)
        trick_index = 0
        for key, edge in DEFAULT_EDGE.items():
            for c in candidates:
                if c[0] == edge['domain'] and c[1] == edge['range']:
                    node_0 = d0[edge['domain']][trick_index]
                    node_1 = d1[edge['range']][trick_index]
                    self.query_graph.add_edge(node_0,
                                              node_1,
                                              key,
                                              type=key,
                                              value=edge['value'])
                    flag = True
                    return flag
                elif c[1] == edge['domain'] and c[0] == edge['range']:
                    node_0 = d1[edge['domain']][trick_index]
                    node_1 = d0[edge['range']][trick_index]
                    self.query_graph.add_edge(node_0,
                                              node_1,
                                              key,
                                              type=key,
                                              value=edge['value'])
                    flag = True
                    return flag
        return flag

    def add_default_edge(self):
        """
        添加默认边
        :return:
        """
        flag = False
        components_set = self.query_graph.get_connected_components_subgraph()
        for i in range(len(components_set)):
            flag = self.add_default_edge_between_components(
                components_set, i, i + 1)
            if flag:
                break
        return flag

    def determine_intention_by_type(self):
        # 根据意图类型来确定意图,对应determine_intention中的1.2
        for n in self.query_graph.nodes:
            if self.query_graph.nodes[n]['label'] == 'concept':
                node_type = self.query_graph.nodes[n]['type']
                if node_type == self.intent:
                    self.add_intention_on_node(n)
                    break

    def add_intention_on_node(self, node):
        self.query_graph.nodes[node]['intent'] = True

    def add_intention_on_nodes(self, node_list):
        """
        在一批节点中优先选择person节点进行插入
        :param node_list: 带插入的一组空节点
        :return:
        """
        for node in node_list:
            if self.query_graph.nodes[node]['type'] == 'Person':
                self.query_graph.nodes[node]['intent'] = True
                return
        # 若找不到person
        node = node_list[0]
        self.query_graph.nodes[node]['intent'] = True

    def get_intention_candidate(self):
        """
        获取候选意图节点id
        :return:候选意图节点id
        """
        logger.info('all concept node as intention candidates')
        intention_candidates = self.query_graph.get_concept_nodes()
        logger.info('intention candidates is %s' % str(intention_candidates))

        if self.intent:
            # 意图识别提供了意图类型
            logger.info('intention type is %s' % self.intent)
            new_intention_candidates = [
                x for x in intention_candidates
                if self.query_graph.nodes[x].get('type') == self.intent
            ]
            intention_candidates = new_intention_candidates
            logger.info('intention candidates is %s' %
                        str(intention_candidates))

        if len(intention_candidates) == 0:
            # print('intention recognizer module produce wrong intention!')
            logger.info('intention recognizer module produce wrong intention!')
            self.error_info = '意图冲突!'
            return

        none_nodes = self.query_graph.get_none_nodes(self.intent)
        if len(none_nodes) > 0:
            logger.info('the graph has %d blank node: %s' %
                        (len(none_nodes), str(none_nodes)))
            intention_candidates = [
                x for x in intention_candidates if x in none_nodes
            ]
            logger.info('intention candidates is %s' %
                        str(intention_candidates))
        return intention_candidates

    def determine_intention(self):
        """
        确定意图:
        1. 意图识别模块通过关键词,获取意图类型;
        2. 根据依存分析模块,将句法依存树根节点附近的实体节点中作为候选意图节点,若上一步得到了意图类型,删去候选意图中的与意图类型冲突的节点;
        3. 在所有候选节点中,若有空节点(即没有字面值描述的节点),则将候选节点集合中的所有空节点作为新的候选节点集合;
        4. 若上一步选出的节点有多个,则优先选择Person类型的节点。
        5. 在候选节点集合中,按照候选意图节点的入度与出度之差,对候选节点进行排序,选出入度与出度之差最大的节点;
        :return:
        """
        intention_candidates = self.get_intention_candidate()
        if self.error_info:
            return
        logger.info('determine intention by degree')
        criterion_dict = dict()
        for node in intention_candidates:
            criterion_dict[node] = self.query_graph.get_out_index(node)

        m = min(criterion_dict.values())
        # 考虑到多个节点上都有最值
        intention_nodes = [k for k, v in criterion_dict.items() if v == m]
        logger.info('nodes: %s have degree: %d' % (str(intention_nodes), m))

        logger.info('final intention node is %d' % intention_nodes[0])
        self.add_intention_on_node(intention_nodes[0])

    def get_person_nodes(self, candidates):
        """
        从候选节点中选出任务person的节点
        :param candidates:
        :return:
        """
        node_list = list()
        for node in candidates:
            if self.query_graph.nodes[node].get('type') == 'Person':
                node_list.append(node)
        return node_list

    def component_assemble(self):
        # 之后根据依存分析来完善
        for k, v in self.node_type_dict.items():
            if len(v) >= 2:
                combinations = itertools.combinations(v, 2)
                combinations = sorted(combinations,
                                      key=self.query_graph.get_outdiff)
                for pair in combinations:
                    # 若两个节点之间连通,则跳过,不存在则合并
                    test_graph = nx.to_undirected(self.query_graph)
                    if nx.has_path(test_graph, pair[0], pair[1]):
                        continue
                    else:
                        mapping = {pair[0]: pair[1]}
                        nx.relabel_nodes(self.query_graph, mapping, copy=False)
                        break

    def init_entity_component(self):
        for e in self.entity:
            component = QueryGraphComponent(e)
            self.entity_component_list.append(nx.MultiDiGraph(component))

    def company_trick(self):
        """
        当同时出现法人,总经理、员工等关系时,抛弃“的公司”关系
        :return:
        """
        company_rels = ['LegalPerson', 'Employ', 'Manager', 'WorkFor']
        flag = False
        for rel in self.relation:
            if rel['type'] in company_rels and rel['value'] != '的公司':
                # 存在非'的公司'的关系
                flag = True
        if not flag:
            return
        for r in self.relation:
            if r['value'] == '的公司':
                self.relation.remove(r)
                break

    def init_relation_component(self):
        self.company_trick()
        for r in self.relation:
            if r['type'] in RELATION_DATA.keys():
                relation_component = nx.MultiDiGraph()
                relation_component.add_edge('temp_0', 'temp_1', r['type'], **r)
                for n in relation_component.nodes:
                    relation_component.nodes[n]['label'] = 'concept'

                relation_component.nodes['temp_0']['type'] = RELATION_DATA[
                    r['type']]['domain']
                relation_component.nodes['temp_1']['type'] = RELATION_DATA[
                    r['type']]['range']
                self.relation_component_list.append(relation_component)

    def pre_process(self):
        """
        对账号进行过滤,如果实体中出现QQ实体,则在关系中过滤ChasQQ关系
        :return:
        """
        """
        self.account = ['QQ_NUM', 'MOB_NUM', 'PHONE_NUM', 'IDCARD_VALUE', 'EMAIL_VALUE', 'WECHAT_VALUE', 'QQ_GROUP_NUM',
                        'WX_GROUP_NUM', 'ALIPAY_VALUE', 'DOUYIN_VALUE', 'JD_VALUE', 'TAOBAO_VALUE', 'MICROBLOG_VALUE',
                        'UNLABEL', 'VEHCARD_VALUE', 'IMEI_VALUE', 'MAC_VALUE']

        self.p_has_account_list = ['QQ', 'MobileNum', 'FixedPhone', 'Idcard', 'Email', 'WeChat', 'QQGroup',
                                   'WeChatGroup', 'Alipay', 'DouYin', 'JD', 'TaoBao', 'MicroBlog', 'UNLABEL',
                                   'PlateNum', 'IMEI', 'MAC']
        """
        account_dict = {
            'QQ_NUM': ['ChasQQ', 'PhasQQ'],
            'MOB_NUM': ['PhasMobileNum', 'ChasMobileNum'],
            'EMAIL_VALUE': ['PhasEmail'],
            'WECHAT_VALUE': ['PhasWeChat'],
            'ALIPAY_VALUE': ['PhasAlipay'],
            'DOUYIN_VALUE': ['PhasDouYin'],
            'JD_VALUE': ['PhasJD'],
            'TAOBAO_VALUE': ['PhasTaoBao'],
            'MICROBLOG_VALUE': ['PhasMicroBlog'],
            'VEHCARD_VALUE': ['PhasVehicleCard'],
            'IDCARD_VALUE': ['PhasIdcard']
        }
        for e in self.entity:
            if e['type'] in account_dict.keys():
                for rel_name in account_dict[e['type']]:
                    new_relation = [
                        x for x in self.relation if x['type'] != rel_name
                    ]
                    self.relation = new_relation
Esempio n. 9
0
class QueryInterface(object):
    """
    实现从问答图到查询接口的转化
    """
    def __init__(self, graph, query):
        self.graph = nx.convert_node_labels_to_integers(graph)
        self.graph = Graph(self.graph)

        self.query = query
        # 用于指出查询意图为某归属属性的情况
        self.intention_tail = ''

        self.entities = dict()
        self.init_entities()

        self.rels = list()
        self.init_rels()
        self.intentions = list()
        self.init_intention()
        self.query_dict = dict()

        self.serial_process()
        self.add_rels_to_entities()
        self.final_delete()
        self.init_query_dict()

    def add_rels_to_entities(self):
        """
        将关系挂到相关实体上
        :return:
        """
        for rel in self.rels:
            rel_id = rel['id']
            entity_1, entity_2 = rel['rel'].split('-')
            self.add_rel_to_entities(rel_id, entity_1)
            self.add_rel_to_entities(rel_id, entity_2)

    def add_rel_to_entities(self, rel_id, entity_id):
        """
        将一个关系的id挂到相关实体上
        :param rel_id:
        :param entity_id:
        :return:
        """
        for _, entities in self.entities.items():
            for e in entities:
                if e['id'] == entity_id:
                    if not e.get('rel'):
                        e['rel'] = list()
                    e['rel'].append(rel_id)
                    return

    def literal_node_reduction(self):
        """
        将图上所有字面值节点规约为与其直接相连的对象节点的一个属性值
        :return: new graph
        """
        new_graph = deepcopy(self.graph)
        for node in self.graph.nodes:
            if self.graph.nodes[node]['label'] == 'literal':
                key = self.graph.nodes[node]['entity']['type']
                value = self.graph.nodes[node]['entity']['value']
                temp_dict = dict()
                temp_dict[key] = value
                for p in self.graph.predecessors(node):
                    if 'data' not in new_graph.nodes[p].keys():
                        new_graph.nodes[p]['data'] = dict()
                    if key not in new_graph.nodes[p]['data'].keys():
                        new_graph.nodes[p]['data'][key] = list()
                    """
                    new_graph.nodes[p]['data'].update(temp_dict)
                    """
                    new_graph.nodes[p]['data'][key].append(value)
                    new_graph.remove_edge(p, node)
                    new_graph.remove_node(node)
        return new_graph

    def node_rename(self):
        """
        将节点按类型信息改名
        :return:
        """
        # 此Dict用于将图中节点改名
        map_dict = dict()
        for node in self.graph.nodes:
            if not isinstance(node, int):
                continue
            if self.graph.nodes[node]['label'] == 'concept':
                map_dict[node] = self.graph.nodes[node]['type'].lower(
                ) + '%d' % node
            if self.graph.nodes[node].get(
                    'intent') and self.graph.is_none_node(node):
                """
                # map_dict[node] = self.graph.nodes[node]['type'].upper() + 'S'
                """
                map_dict[node] = get_complex(
                    self.graph.nodes[node]['type']).upper()
        new_graph = nx.relabel_nodes(self.graph, mapping=map_dict)
        self.graph = new_graph

    def is_belong_property(self, edge):
        """
        判断一条边是否对应归属属性
        :param edge: (n, m, k)三元组,表示从n-k->m
        :return:
        """
        n, m, k = edge
        for key, value in RELATION_DATA.items():
            if k == key and value['belong']:
                dom = self.graph.nodes[n]['type']
                ran = self.graph.nodes[m]['type']
                if dom == value['domain'] and ran == value['range']:
                    return True
        return False

    def belong_reduction(self):
        """
        按边是否属于归属属性,对图进行规约
        :return:
        """
        new_graph = deepcopy(self.graph)
        for edge in self.graph.edges:
            if self.is_belong_property(edge):
                # 如果这条边对应归属属性
                # 将后一个节点的信息添加到前一个节点
                # 还要将后一个点的所有边添加到前一个点
                # 删除后一个节点
                n1, n2, k = edge
                if n1 not in new_graph.nodes or n2 not in new_graph.nodes:
                    continue
                if n2.isupper():
                    # 说明后一个节点为查询意图,不再规约
                    self.intention_tail = '.%s' % new_graph.nodes[n2].get(
                        'type')
                    # new_graph.remove_node(n2)
                    continue
                remain_belong_reduction(new_graph, n1, n2)
        return new_graph

    def graph_reduction(self):
        """
        对图结构进行规约
        :return:
        """
        self.node_rename()
        self.graph = self.literal_node_reduction()
        self.graph = self.belong_reduction()

    def init_entities(self):
        """
        将图中的实体进行处理,得到接口中对应的实体
        :return:
        """
        self.graph_reduction()
        for node in self.graph.nodes:
            # 空节点也占一个
            temp_dict = dict()
            node_type = self.graph.nodes[node]['type']
            if node_type not in self.entities.keys():
                self.entities[node_type] = list()
            temp_dict['id'] = node
            temp_data = self.graph.nodes[node].get('data')
            if temp_data:
                temp_dict.update(self.graph.nodes[node].get('data'))
            self.entities[node_type].append(temp_dict)

    def init_rels(self):
        for i, edge in enumerate(self.graph.edges):
            n, m, k = edge
            if self.is_belong_property((n, m, k)):
                continue
            edge_id = i + 1
            temp_dict = dict()

            temp_dict['id'] = 'relation%d' % edge_id
            self.graph.get_edge_data(n, m,
                                     k)['edge_id'] = 'relation%d' % edge_id
            temp_dict['rel'] = '%s-%s' % (str(n), str(m))
            temp_dict['type'] = k
            temp_dict['value'] = self.graph.get_edge_data(n, m, k).get('value')
            self.rels.append(temp_dict)

    def get_intent_node(self):
        for node in self.graph.nodes:
            if self.graph.nodes[node].get('intent'):
                return node

    def isolated_node_process(self, shortest_path):
        for node in self.graph.nodes:
            if node not in shortest_path:
                for p in self.graph.predecessors(node):
                    if 'data' not in self.graph.nodes[p].keys():
                        self.graph.nodes[p]['data'] = dict()
                    for k, v in self.graph.get_edge_data(p, node).items():
                        temp_dict = {k: node}
                        self.graph.nodes[p]['data'].update(temp_dict)

    def init_intention(self):
        top_sort = nx.topological_sort(self.graph)
        current = next(top_sort)
        header = current
        intention_str = str(header)

        intent_node = self.get_intent_node()
        shortest_path = nx.shortest_path(self.graph, header, intent_node)

        for node in shortest_path:
            if node == header:
                continue
            old = current
            current = node
            edge = self.graph.get_edge_data(old, current, default=None)
            k = list(edge.keys())[0]
            if self.is_belong_property((old, current, k)):
                continue
            if edge:
                for v in edge.values():
                    edge_id = v['edge_id']
                    intention_str += '.%s' % str(edge_id)
        intention_str += self.intention_tail
        self.intentions.append(intention_str)
        # 未在最短路径上的点,处理
        self.isolated_node_process(shortest_path)
        # self.init_entities()

    def init_query_dict(self):
        self.query_dict['query'] = self.query
        self.query_dict['intentions'] = self.intentions
        self.query_dict['entities'] = self.entities
        self.query_dict['rels'] = self.rels

    def get_query_data(self):
        q_data = dict_low_case(self.query_dict)
        return q_data

    def serial_process(self):
        """
        对查询实体重新按顺序编号
        :return:
        """
        for e_type in self.entities:
            temp_type = e_type.lower()
            flag = False
            for n, e in enumerate(self.entities[e_type]):
                entity_id = e['id']
                if entity_id == entity_id.upper():
                    # 存在一个全大写的实体
                    flag = True
                    continue
                if flag:
                    new_id = '%s%d' % (temp_type, n)
                else:
                    new_id = '%s%d' % (temp_type, n + 1)

                self.find_replace(entity_id, new_id)
                e['id'] = new_id

    def find_replace(self, entity_id, new_id):
        """
        在intention和rels查找entity_id替换为new_id
        :param entity_id:
        :param new_id:
        :return:
        """
        intent_str = self.intentions[0]
        intent_str = intent_str.replace(entity_id, new_id)
        self.intentions[0] = intent_str
        for relation in self.rels:
            relation['rel'] = relation['rel'].replace(entity_id, new_id)

    def final_delete(self):
        """
        针对测试需求,删除全大写的实体
        :return:
        """
        for e_type in self.entities:
            for e in self.entities[e_type]:
                entity_id = e['id']
                if entity_id == entity_id.upper() and not e.get('rel'):
                    self.entities[e_type].remove(e)
                    break
            if len(self.entities[e_type]) == 0:
                self.entities.pop(e_type)
                break