def __find_paths_start_with_entities(self, graph, entity_items, relation_items, edges, output_paths=Paths(), used_edges=set()): new_output_paths = Paths([]) for entity_item in entity_items: unavailable_edges = self.find_edges_by_entities(edges, entity_item) available_edges = edges - unavailable_edges available_entity_items = entity_items - [entity_item] for entity in entity_item.uris: for edge in self.find_edges_by_entity(edges, entity, used_edges): if not edge.uri.is_type(): used_relations = [edge.uri] else: used_relations = edge.dest_node.uris unavailable_relations = LinkedItem.list_contains_uris(relation_items, used_relations) for unavailable_relation in unavailable_relations: available_relations = relation_items-MyList([unavailable_relation]) new_paths = self.__find_paths(graph, available_entity_items, # entity_items - LinkedItem.list_contains_uris(entity_items, entities), available_relations, edges - {edge}, # available_edges, # , output_paths=output_paths.extend(edge), used_edges=used_edges | set([edge])) new_output_paths.add(new_paths, lambda path: len(path) >= len(graph.relation_items)) return new_output_paths
def __find_paths_start_with_entities(self, graph, entity_items, relation_items, edges, output_paths=Paths(), used_edges=set()): new_output_paths = Paths([]) for entity_item in entity_items: for entity in entity_item.uris: for edge in self.find_edges_by_entity(edges, entity, used_edges): if not edge.uri.is_type(): used_relations = [edge.uri] else: used_relations = edge.dest_node.uris entities = MyList() if not (edge.source_node.are_all_uris_generic() or edge.uri.is_type()): entities.extend(edge.source_node.uris) if not (edge.dest_node.are_all_uris_generic() or edge.uri.is_type()): entities.extend(edge.dest_node.uris) entity_use = entity_items - LinkedItem.list_contains_uris( entity_items, entities) relation_use = relation_items - LinkedItem.list_contains_uris( relation_items, used_relations) edge_use = edges - {edge} new_paths = self.__find_paths( graph, entity_use, relation_use, edge_use, output_paths=output_paths.extend(edge), used_edges=used_edges | set([edge])) # new_paths = self.__find_paths(graph, # entity_items - LinkedItem.list_contains_uris(entity_items, entities), # relation_items - LinkedItem.list_contains_uris(relation_items, # used_relations), # edges - {edge}, # output_paths=output_paths.extend(edge), # used_edges=used_edges | set([edge])) new_output_paths.add( new_paths, lambda path: len(path) >= len(graph.relation_items)) return new_output_paths
def __find_paths(self, graph, entity_items, relation_items, edges, output_paths=Paths(), used_edges=set()): new_output_paths = Paths([]) if len(relation_items) == 0: if len(entity_items) > 0: return Paths() return output_paths used_relations = [] for relation_item in relation_items: for relation in relation_item.uris: used_relations = used_relations + [relation] for edge in self.find_edges(edges, relation, used_edges): entities = MyList() if not (edge.source_node.are_all_uris_generic() or edge.uri.is_type()): entities.extend(edge.source_node.uris) if not (edge.dest_node.are_all_uris_generic() or edge.uri.is_type()): entities.extend(edge.dest_node.uris) new_paths = self.__find_paths(graph, entity_items - LinkedItem.list_contains_uris(entity_items, entities), relation_items - LinkedItem.list_contains_uris(relation_items, used_relations), edges - {edge}, output_paths=output_paths.extend(edge), used_edges=used_edges | set([edge])) new_output_paths.add(new_paths, lambda path: len(path) >= len(graph.relation_items)) return new_output_paths
def to_where_statement(self, graph, parse_queryresult, ask_query, count_query, sort_query): graph.generalize_nodes() graph.merge_edges() if ask_query: paths = Paths([(Path([edge])) for edge in graph.edges]) else: paths = self.__find_paths_start_with_entities(graph, graph.entity_items, graph.relation_items, graph.edges) paths = paths.remove_duplicates() # Expand coverage by changing generic ids new_paths = [] for path in paths: to_be_updated_edges = [] generic_nodes = set() for edge in path: if edge.source_node.are_all_uris_generic(): generic_nodes.add(edge.source_node) if edge.dest_node.are_all_uris_generic(): generic_nodes.add(edge.dest_node) if edge.source_node.are_all_uris_generic() and not edge.dest_node.are_all_uris_generic(): to_be_updated_edges.append( {"type": "source", "node": edge.source_node, "edge": edge}) if edge.dest_node.are_all_uris_generic() and not edge.source_node.are_all_uris_generic(): to_be_updated_edges.append( {"type": "dest", "node": edge.dest_node, "edge": edge}) for new_node in generic_nodes: for edge_info in to_be_updated_edges: if edge_info["node"] != new_node: new_path = None if edge_info["type"] == "source": new_path = path.replace_edge(edge_info["edge"], edge_info["edge"].copy(source_node=new_node)) if edge_info["type"] == "dest": new_path = path.replace_edge(edge_info["edge"], edge_info["edge"].copy(dest_node=new_node)) if new_path is not None: new_paths.append(new_path) new_paths = Paths(new_paths).remove_duplicates() # for new_path in new_paths: # generic_equal = False # if new_path not in paths: # for path in paths: # if path.generic_equal_with_substitutable_id(new_path): # generic_equal = True # break # if not generic_equal: # paths.append(new_path) for new_path in new_paths: paths.append(new_path) paths = paths.remove_duplicates() paths.sort(key=lambda x: x.confidence, reverse=True) output = paths.to_where(graph.kb, ask_query) # Remove queries with no answer filtered_output = [] for item in output: target_var = "?u_" + str(item["suggested_id"]) raw_answer = graph.kb.query_where(item["where"], return_vars=target_var, count=count_query, ask=ask_query) answerset = AnswerSet(raw_answer, parse_queryresult) # Do not include the query if it does not return any answer, except for boolean query if len(answerset.answer_rows) > 0 or ask_query: item["target_var"] = target_var item["answer"] = answerset filtered_output.append(item) # filtered_output_with_no_duplicate_answer = [] # for n, ii in enumerate(filtered_output): # duplicate_answer = False # for item in filtered_output[n + 1:]: # if item["answer"] == ii["answer"]: # duplicate_answer = True # if not duplicate_answer: # filtered_output_with_no_duplicate_answer.append(ii) # return filtered_output_with_no_duplicate_answer return filtered_output