def __find_paths(self, graph, entity_items, relation_items, edges, output_paths=Paths(), used_edges=set()): new_output_paths = Paths([]) if len(relation_items) == 0: if len(entity_items) > 0: return Paths() return output_paths used_relations = [] for relation_item in relation_items: for relation in relation_item.uris: used_relations = used_relations + [relation] for edge in self.find_edges(edges, relation, used_edges): entities = MyList() if not (edge.source_node.are_all_uris_generic() or edge.uri.is_type()): entities.extend(edge.source_node.uris) if not (edge.dest_node.are_all_uris_generic() or edge.uri.is_type()): entities.extend(edge.dest_node.uris) new_paths = self.__find_paths(graph, entity_items - LinkedItem.list_contains_uris(entity_items, entities), relation_items - LinkedItem.list_contains_uris(relation_items, used_relations), edges - {edge}, output_paths=output_paths.extend(edge), used_edges=used_edges | set([edge])) new_output_paths.add(new_paths, lambda path: len(path) >= len(graph.relation_items)) return new_output_paths
def __find_paths_start_with_entities(self, graph, entity_items, relation_items, edges, output_paths=Paths(), used_edges=set()): new_output_paths = Paths([]) for entity_item in entity_items: for entity in entity_item.uris: for edge in self.find_edges_by_entity(edges, entity, used_edges): if not edge.uri.is_type(): used_relations = [edge.uri] else: used_relations = edge.dest_node.uris entities = MyList() if not (edge.source_node.are_all_uris_generic() or edge.uri.is_type()): entities.extend(edge.source_node.uris) if not (edge.dest_node.are_all_uris_generic() or edge.uri.is_type()): entities.extend(edge.dest_node.uris) entity_use = entity_items - LinkedItem.list_contains_uris( entity_items, entities) relation_use = relation_items - LinkedItem.list_contains_uris( relation_items, used_relations) edge_use = edges - {edge} new_paths = self.__find_paths( graph, entity_use, relation_use, edge_use, output_paths=output_paths.extend(edge), used_edges=used_edges | set([edge])) # new_paths = self.__find_paths(graph, # entity_items - LinkedItem.list_contains_uris(entity_items, entities), # relation_items - LinkedItem.list_contains_uris(relation_items, # used_relations), # edges - {edge}, # output_paths=output_paths.extend(edge), # used_edges=used_edges | set([edge])) new_output_paths.add( new_paths, lambda path: len(path) >= len(graph.relation_items)) return new_output_paths
def __find_paths_start_with_entities(self, graph, entity_items, relation_items, edges, output_paths=Paths(), used_edges=set()): new_output_paths = Paths([]) for entity_item in entity_items: unavailable_edges = self.find_edges_by_entities(edges, entity_item) available_edges = edges - unavailable_edges available_entity_items = entity_items - [entity_item] for entity in entity_item.uris: for edge in self.find_edges_by_entity(edges, entity, used_edges): if not edge.uri.is_type(): used_relations = [edge.uri] else: used_relations = edge.dest_node.uris unavailable_relations = LinkedItem.list_contains_uris(relation_items, used_relations) for unavailable_relation in unavailable_relations: available_relations = relation_items-MyList([unavailable_relation]) new_paths = self.__find_paths(graph, available_entity_items, # entity_items - LinkedItem.list_contains_uris(entity_items, entities), available_relations, edges - {edge}, # available_edges, # , output_paths=output_paths.extend(edge), used_edges=used_edges | set([edge])) new_output_paths.add(new_paths, lambda path: len(path) >= len(graph.relation_items)) return new_output_paths
def query(): question = request.json['question'] raw_entities = request.json['entities'] raw_relations = request.json['relations'] entities = [] for item in raw_entities: uris = [Uri(uri["uri"], DBpedia.parse_uri, uri["confidence"]) for uri in item["uris"]] entities.append(LinkedItem(item["surface"], uris)) relations = [] for item in raw_relations: uris = [Uri(uri["uri"], DBpedia.parse_uri, uri["confidence"]) for uri in item["uris"]] relations.append(LinkedItem(item["surface"], uris)) question_type, type_confidence = get_question_type(question) count_query = False ask_query = False if question_type == 2: count_query = True elif question_type == 1: ask_query = True generated_queries = [] combination_mode = True if combination_mode: combination_list = create_entity_relations_combinations(entities, relations) for comb in combination_list: # why did Sylvia Liang just generates query if none exists? if len(generated_queries) == 0: generated_queries.extend(generate_query(question, question_type, comb[0], comb[1], count_query, ask_query)) else: generated_queries = generate_query(question, question_type, entities, relations, count_query, ask_query) queries = postprocess(generated_queries, count_query, ask_query) result = { "queries": queries, "type": get_question_type_text(question_type), "type_confidence": type_confidence } return jsonify(result)
def __parse(self, dataset, name, top): output = [] for item in dataset[name]: uris = [] for uri in item["uris"]: uris.append(Uri(uri["uri"], self.parser, uri["confidence"])) start_index, length = item["surface"] surface = dataset["question"][start_index:start_index + length] output.append(LinkedItem(surface, uris[:top])) return output
def do(self, qapair, force_gold=False, top=5): entities = [] relations = [] for u in qapair.sparql.uris: question = qapair.question.text mentions = find_mentions(question, [u]) surface = "" if len(mentions) > 0: surface = question[mentions[0]["start"]:mentions[0]["end"]] linked_item = LinkedItem(surface, [u]) if u.is_entity(): entities.append(linked_item) if u.is_ontology(): relations.append(linked_item) return entities, relations
o = Orchestrator(None, question_type_classifier, None, parser, True) raw_entities = [{ "surface": "", "uris": [{ "confidence": 1, "uri": "http://dbpedia.org/resource/Bill_Finger" }] }] entities = [] for item in raw_entities: uris = [ Uri(uri["uri"], kb.parse_uri, uri["confidence"]) for uri in item["uris"] ] entities.append(LinkedItem(item["surface"], uris)) raw_relations = [{ "surface": "", "uris": [{ "confidence": 1, "uri": "http://dbpedia.org/ontology/creator" }] }, { "surface": "", "uris": [{ "confidence": 1, "uri": "http://dbpedia.org/ontology/ComicsCharacter" }]
def generate_query(): if not flask.request.json: flask.abort(400) question = flask.request.json['question'] force_count_query = flask.request.json[ 'force_count'] if 'force_count' in flask.request.json else False force_bool_query = flask.request.json[ 'force_bool'] if 'force_bool' in flask.request.json else False force_list_query = flask.request.json[ 'force_list'] if 'force_list' in flask.request.json else False raw_entities = flask.request.json['entities'] raw_relations = flask.request.json['relations'] h1_threshold = int(flask.request.json['h1_threshold'] ) if 'h1_threshold' in flask.request.json else 9999999 timeout_threshold = int(flask.request.json['timeout'] ) if 'timeout' in flask.request.json else 9999999 use_cache = bool(flask.request.json['use_cache'] ) if 'use_cache' in flask.request.json else True hash_key = hash( (str(question) + str(raw_entities) + str(raw_relations) + str(h1_threshold) + str(force_count_query) + str(force_bool_query) + str(force_list_query)).encode('utf-8')) if use_cache and hash_key in hash_list: return flask.jsonify(hash_list[hash_key]), 201 logger.info(question) entities = [] for item in raw_entities: uris = [ Uri(uri["uri"], kb.parse_uri, uri["confidence"]) for uri in item["uris"] ] entities.append(LinkedItem(item["surface"], uris)) relations = [] for item in raw_relations: uris = [ Uri(uri["uri"], kb.parse_uri, uri["confidence"]) for uri in item["uris"] ] relations.append(LinkedItem(item["surface"], uris)) try: if len(entities) == 0 or len(relations) == 0: raise Exception('Wrong number of input entity/relation!') with timeout(timeout_threshold): question_type = None if force_list_query: question_type = 0 elif force_bool_query: question_type = 1 elif force_count_query: question_type = 2 queries, question_type, type_confidence = queryBuilder.generate_query( question, entities, relations, h1_threshold, question_type) question_type_str = "list" ask_query = False count_query = False if question_type == 2: question_type_str = "count" count_query = True elif question_type == 1: question_type_str = "boolean" ask_query = True queries = [{ "query": kb.sparql_query(item["where"], "?u_" + str(item["suggested_id"]), count_query, ask_query), "confidence": item["confidence"] } for item in queries] result = { 'queries': queries, 'type': question_type_str, 'type_confidence': type_confidence } if use_cache: hash_list[hash_key] = result hash_list.save(hash_file) return flask.jsonify(result), 201 except RuntimeError as expt: logger.error(expt) return flask.jsonify({'error': str(expt)}), 408 except Exception as expt: logger.error(expt) return flask.jsonify({'error': str(expt)}), 422
def generate_query(): if not flask.request.json: flask.abort(400) question = flask.request.json['question'] raw_entities = flask.request.json['entities'] raw_relations = flask.request.json['relations'] h1_threshold = int(flask.request.json['h1_threshold'] ) if 'h1_threshold' in flask.request.json else 9999999 timeout_threshold = int(flask.request.json['timeout'] ) if 'timeout' in flask.request.json else 9999999 use_cache = bool(flask.request.json['use_cache'] ) if 'use_cache' in flask.request.json else True hash_key = hash( str(question) + str(raw_entities) + str(raw_relations) + str(h1_threshold)) if use_cache and hash_key in hash_list: return flask.jsonify(hash_list[hash_key]), 201 logger.info(question) entities = [] for item in raw_entities: uris = [ Uri(uri["uri"], kb.parse_uri, uri["confidence"]) for uri in item["uris"] ] entities.append(LinkedItem(item["surface"], uris)) relations = [] for item in raw_relations: uris = [ Uri(uri["uri"], kb.parse_uri, uri["confidence"]) for uri in item["uris"] ] relations.append(LinkedItem(item["surface"], uris)) try: with timeout(timeout_threshold): queries, question_type = queryBuilder.generate_query( question, entities, relations, h1_threshold) question_type_str = "list" ask_query = False count_query = False if question_type == 2: question_type_str = "count" count_query = True elif question_type == 1: question_type_str = "boolean" ask_query = True queries = [{ "query": kb.sparql_query(item["where"], "?u_" + str(item["suggested_id"]), count_query, ask_query), "confidence": item["confidence"] } for item in queries] result = {'queries': queries, 'type': question_type_str} if use_cache: hash_list[hash_key] = result hash_list.save(hash_file) return flask.jsonify(result), 201 except: return flask.jsonify({}), 408