def _3_0_1_to_graphs(candidate_pathes, s1, t1): ''' e-{p1}->a*-{p2}->literal "location.location.contains\tm.05vd5\tlocation.location.area\t47.0", ''' candidate_graphquery_list = [] current_nid = 1 node_topic_entity = GroundedNode(nid=current_nid, node_type=t1, id=s1, type_class='', friendly_name="", question_node=0) current_nid += 1 node_answer_entity = GroundedNode(nid=current_nid, node_type="class", id='?a', type_class='', friendly_name="", question_node=1) current_nid += 1 node_literal = GroundedNode(nid=current_nid, node_type="literal", id='?literal', type_class='', friendly_name="", question_node=0) p1_p2_answers = collections.defaultdict(set) for candidate in candidate_pathes: cols = candidate.split("\t") if len(cols) != 4: continue p1, answer_entity, p2, literal_entity = cols p1_p2_answers['\t'.join([p1, p2])].add((answer_entity, literal_entity)) for p1_p2 in p1_p2_answers: candidate_graphquery = dict() candidate_graphquery["querytype"] = '3_0_1' candidate_graphquery["nodes"] = [ node_topic_entity, node_answer_entity, node_literal ] p1, p2 = p1_p2.split('\t') edge1 = GroundedEdge(start=node_topic_entity.nid, end=node_answer_entity.nid, relation=p1) edge2 = GroundedEdge(start=node_answer_entity.nid, end=node_literal.nid, relation=p2) candidate_graphquery["edges"] = [edge1, edge2] candidate_graphquery["path"] = p1_p2 candidate_graphquery["denotation"] = list(p1_p2_answers[p1_p2]) candidate_graphquery_list.append(candidate_graphquery) return candidate_graphquery_list
def _3_1_to_graphs(candidate_pathes, s1, t1): ''' e-{p1}->m-{p2}->a "user.tsegaran.random.taxonomy_subject.entry\tm.04_8c54\tuser.tsegaran.random.taxonomy_entry.taxonomy\tm.04n6k", ''' candidate_graphquery_list = [] current_nid = 1 node_topic_entity = GroundedNode(nid=current_nid, node_type=t1, id=s1, type_class='', friendly_name="", question_node=0) current_nid += 1 node_m_entity = GroundedNode(nid=current_nid, node_type="class", id='?m', type_class='', friendly_name="", question_node=0) current_nid += 1 node_answer_entity = GroundedNode(nid=current_nid, node_type="class", id='?a', type_class='', friendly_name="", question_node=1) p1_p2_answers = collections.defaultdict(set) for candidate in candidate_pathes: cols = candidate.split("\t") if len(cols) != 4: continue p1, m_entity, p2, answer_entity = cols p1_p2_answers['\t'.join([p1, p2])].add(answer_entity) for p1_p2 in p1_p2_answers: candidate_graphquery = dict() candidate_graphquery["querytype"] = '3_1' candidate_graphquery["nodes"] = [ node_topic_entity, node_m_entity, node_answer_entity ] p1, p2 = p1_p2.split('\t') edge1 = GroundedEdge(start=node_topic_entity.nid, end=node_m_entity.nid, relation=p1) edge2 = GroundedEdge(start=node_m_entity.nid, end=node_answer_entity.nid, relation=p2) candidate_graphquery["edges"] = [edge1, edge2] candidate_graphquery["path"] = p1_p2 candidate_graphquery["denotation"] = list(p1_p2_answers[p1_p2]) candidate_graphquery_list.append(candidate_graphquery) return candidate_graphquery_list
def add_path(_grounded_nodes, grounded_graph, path, son_id=None, father_id=None): '''add path in current grounded graph''' basic_son_node = search_one_node_in_nodes_by_nid(_grounded_nodes, son_id) basic_father_node = search_one_node_in_nodes_by_nid( _grounded_nodes, father_id) spo_list = path.split('\t') son_node = GroundedNode(nid=son_id, id=spo_list[0], score=1.0, node_type=basic_son_node.node_type, question_node=basic_son_node.question_node) father_node = GroundedNode(nid=father_id, id=spo_list[len(spo_list) - 1], score=1.0, node_type=basic_father_node.node_type, question_node=basic_father_node.question_node) grounded_graph.add_node(son_node) grounded_graph.add_node(father_node) if len(spo_list) > 3: # s->{p1}->mediator->{p2}->o mediator or cvt middle_id = son_id * 10 + father_id middle_node = GroundedNode(nid=middle_id, id=spo_list[2], score=1, node_type='class', question_node=0) grounded_graph.add_node(middle_node) edge_1 = GroundedEdge(start=son_id, end=middle_id, relation=spo_list[1], friendly_name=spo_list[1], score=1.0) edge_2 = GroundedEdge(start=middle_id, end=father_id, relation=spo_list[3], friendly_name=spo_list[3], score=1.0) grounded_graph.add_edge(edge_1) grounded_graph.add_edge(edge_2) else: edge = GroundedEdge(start=son_id, end=father_id, relation=spo_list[1], friendly_name=spo_list[1], score=1.0) grounded_graph.add_edge(edge)
def read_graph_question_json(filename): ''' function: read graphquestion dataset :param filename: filename path file :return: graph_question structure list ''' graphquestionsList = list() with open(filename, 'rb') as f: data = json.load(f) for questionAnnotation in data: graphq = GraphQuestion() graphq.qid = questionAnnotation["qid"] graphq.graph_entity_level_paraphrase_id = graphq.qid % 100 graphq.graph_sentence_level_paraphrase_id = (graphq.qid // 100) % 10000 graphq.graph_query_id = graphq.qid // 1000000 graphq.question = questionAnnotation["question"] graphq.answer = questionAnnotation["answer"] graphq.function = questionAnnotation["function"] graphq.commonness = questionAnnotation["commonness"] graphq.num_node = questionAnnotation["num_node"] graphq.num_edge = questionAnnotation["num_edge"] graphq.graph_query = questionAnnotation["graph_query"] for node in questionAnnotation["graph_query"]["nodes"]: graphq.nodes.append(GroundedNode( nid=node["nid"], node_type=node["node_type"], type_class=node["class"], friendly_name=node["friendly_name"], question_node=node["question_node"], function=node["function"], id=node["id"], score=1.0)) for edge in questionAnnotation["graph_query"]["edges"]: graphq.edges.append(GroundedEdge( start=edge["start"], end=edge["end"], relation=edge["relation"], friendly_name=edge["friendly_name"], score=1.0)) graphq.sparql_query = questionAnnotation["sparql_query"] graphquestionsList.append(graphq) return graphquestionsList
def _ungrounded_graph_to_grounded_graph(ungrounded_graph, grounding_result_list): grouned_graph_list = [] if grounding_result_list is None: return grouned_graph_list grounded_nodes = [] for ungrounded_node in ungrounded_graph.nodes: grounded_nodes.append(GroundedNode(nid=ungrounded_node.nid, node_type=ungrounded_node.node_type, type_class=ungrounded_node.type_class, friendly_name=ungrounded_node.friendly_name, question_node=ungrounded_node.question_node, function=ungrounded_node.function, score=0)) for grounded_node in grounded_nodes: for ungrounded_node, nodes_grounding in grounding_result_list: if grounded_node.nid == ungrounded_node.nid: """nodes_grounding: {'en.xtracycle':1.6, 'freebase.type_profile':1.0}""" for mid, pro in nodes_grounding.items(): grounded_node.id = mid grounded_node.score = pro break grounded_graph = GrounedGraph(grounded_query_id=ungrounded_graph.ungrounded_query_id, type='', nodes=grounded_nodes, edges=[], key_path='', sparql_query='', score=0, denotation='') grouned_graph_list.append(grounded_graph) return grouned_graph_list
def _ungrounded_to_grounded(ungrounded_graph): ''' convert ungrounded graph to basic grounded graph :param ungrounded_graph: :return: ''' nodes = [] edges = [] for ungrounded_node in ungrounded_graph.nodes: nodes.append( GroundedNode(nid=ungrounded_node.nid, node_type=ungrounded_node.node_type, type_class=ungrounded_node.type_class, friendly_name=ungrounded_node.friendly_name, question_node=ungrounded_node.question_node, function=ungrounded_node.function, score=0)) for ungrounded_edge in ungrounded_graph.edges: edges.append( GroundedEdge(start=ungrounded_edge.start, end=ungrounded_edge.end, friendly_name=ungrounded_edge.friendly_name, score=ungrounded_edge.score)) return GrounedGraph(grounded_query_id=ungrounded_graph.ungrounded_query_id, type='', nodes=nodes, edges=edges, key_path='', sparql_query='', score=0, denotation='')
def read_gold_graph_query(gold_grounded_graph_json): """ function: read grounded_graph data :param grounded_graph_json: grounded_graph_json :return: grounded_graph structure """ if gold_grounded_graph_json is None: return None grounded_query_id = -1 nodes = [] edges = [] for node_json in gold_grounded_graph_json['nodes']: type_class = None if 'type_class' in node_json.keys(): type_class = node_json['type_class'] elif 'class' in node_json.keys(): type_class = node_json['class'] nodes.append( GroundedNode( nid=node_json["nid"], node_type=node_json["node_type"], id=node_json["id"], type_class=type_class, #class friendly_name=node_json["friendly_name"], question_node=node_json["question_node"], function=node_json["function"])) for edge_json in gold_grounded_graph_json["edges"]: edges.append( GroundedEdge(start=edge_json["start"], end=edge_json["end"], relation=edge_json["relation"], friendly_name=edge_json["friendly_name"])) type = 'gold' return GrounedGraph(grounded_query_id, type, nodes, edges)
def read_grounded_graph(grounded_graph_json): ''' function: read grounded_graph data :param grounded_graph_json: grounded_graph_json :return: grounded_graph structure ''' grounded_query_id = grounded_graph_json['grounded_query_id'] nodes = [] edges = [] if 'nodes' in grounded_graph_json: for node_json in grounded_graph_json["nodes"]: nodes.append( GroundedNode(nid=node_json["nid"], node_type=node_json["node_type"], id=node_json["id"], type_class=node_json['type_class'], friendly_name=node_json["friendly_name"], question_node=node_json["question_node"], function=node_json["function"], score=node_json['score'], ordinal=node_json['ordinal'])) if 'edges' in grounded_graph_json: for edge_json in grounded_graph_json["edges"]: edges.append( GroundedEdge(start=edge_json["start"], end=edge_json["end"], relation=edge_json["relation"], friendly_name=edge_json["friendly_name"], score=edge_json["score"])) type = grounded_graph_json['type'] key_path = grounded_graph_json['key_path'] sparql_query = grounded_graph_json["sparql_query"] score = grounded_graph_json["score"] denotation = grounded_graph_json["denotation"] total_score = 0.0 f1_score = 0.0 if 'total_score' in grounded_graph_json.keys(): total_score = grounded_graph_json['total_score'] if 'f1_score' in grounded_graph_json.keys(): f1_score = grounded_graph_json['f1_score'] return GrounedGraph(grounded_query_id, type, nodes, edges, key_path=key_path, sparql_query=sparql_query, score=score, denotation=denotation, total_score=total_score, f1_score=f1_score)
def _3_0_to_graphs(candidate_pathes, s1, t1): '''1_0 entity-{p}->o 对应, 第1位对应到路径是p, 第二位对应到路径是o ns:m.0dhqrm "organization.organization.headquarters\tm.08cshk7''' candidate_graphquery_list = [] current_nid = 1 node_topic_entity = GroundedNode(nid=current_nid, node_type=t1, id=s1, type_class='', friendly_name="", question_node=0) current_nid += 1 node_answer_entity = GroundedNode(nid=current_nid, node_type="class", id='?a', type_class='', friendly_name="", question_node=1) p_answers = collections.defaultdict(set) for candidate in candidate_pathes: cols = candidate.split("\t") if len(cols) != 2: continue relation, answer_entity = cols p_answers[relation].add(answer_entity) for p in p_answers: candidate_graphquery = dict() candidate_graphquery["querytype"] = '3_0' candidate_graphquery["nodes"] = [node_topic_entity, node_answer_entity] edge = GroundedEdge(start=node_topic_entity.nid, end=node_answer_entity.nid, relation=p) candidate_graphquery["edges"] = [edge] candidate_graphquery["path"] = p candidate_graphquery["denotation"] = list(p_answers[p]) candidate_graphquery_list.append(candidate_graphquery) return candidate_graphquery_list
def read_gold_graph_query(gold_grounded_graph_json): ''' function: read grounded_graph data :param grounded_graph_json: grounded_graph_json :return: grounded_graph structure ''' if gold_grounded_graph_json is None: return None grounded_query_id = -1 nodes = [] edges = [] for node_json in gold_grounded_graph_json['nodes']: type_class = None if 'type_class' in node_json.keys(): type_class = node_json['type_class'] elif 'class' in node_json.keys(): type_class = node_json['class'] nodes.append( GroundedNode( nid=node_json["nid"], node_type=node_json["node_type"], id=node_json["id"], type_class=type_class, #class friendly_name=node_json["friendly_name"], question_node=node_json["question_node"], function=node_json["function"])) for edge_json in gold_grounded_graph_json["edges"]: edges.append( GroundedEdge(start=edge_json["start"], end=edge_json["end"], relation=edge_json["relation"], friendly_name=edge_json["friendly_name"])) type = 'gold' # key_path = grounded_graph_json['key_path'] # sparql_query = grounded_graph_json["sparql_query"] # score = grounded_graph_json["score"] # denotation = grounded_graph_json["denotation"] # total_score = 0.0 # if 'total_score' in grounded_graph_json.keys(): # total_score = grounded_graph_json['total_score'] return GrounedGraph(grounded_query_id, type, nodes, edges)
def _3_1_3_to_graphs(candidate_pathes, s1, t1): ''' #e-{p1}->m1-{p2}->a-{p3}->m2->{p4}->literal 对应 location.location.contains\tm.047tj\tgeography.island.body_of_water\tm.05rgl\tlocation.location.geolocation\tm.05l1d9y\tlocation.geocode.latitude\t0.0", :param paths: :param s1: :param t1: :return: ''' candidate_graphquery_list = [] current_nid = 1 node_topic_entity = GroundedNode(nid=current_nid, node_type=t1, id=s1, type_class='', friendly_name="", question_node=0) current_nid += 1 node_m_entity = GroundedNode(nid=current_nid, node_type="class", id='?m', type_class='', friendly_name="", question_node=0) current_nid += 1 node_answer_entity = GroundedNode(nid=current_nid, node_type="class", id='?a', type_class='', friendly_name="", question_node=1) current_nid += 1 node_c_entity = GroundedNode(nid=current_nid, node_type="class", id='?c', type_class='', friendly_name="", question_node=0) current_nid += 1 node_literal_entity = GroundedNode(nid=current_nid, node_type="literal", id='?literal', type_class='', friendly_name="", question_node=0) p1_p2_p3_p4_answers = collections.defaultdict(set) for candidate in candidate_pathes: cols = candidate.split('\t') if len(cols) != 8: continue p1, m_1_entity, p2, answer_entity, p3, m_2_entity, p4, literal_entity = candidate.split( "\t") p1_p2_p3_p4_answers['\t'.join([p1, p2, p3, p4])].add( (answer_entity, literal_entity)) for p1_p2_p3_p4 in p1_p2_p3_p4_answers: candidate_graphquery = dict() candidate_graphquery["querytype"] = '3_1_3' candidate_graphquery["nodes"] = [ node_topic_entity, node_m_entity, node_answer_entity, node_c_entity, node_literal_entity ] p1, p2, p3, p4 = p1_p2_p3_p4.split('\t') edge1 = GroundedEdge(start=node_topic_entity.nid, end=node_m_entity.nid, relation=p1) edge2 = GroundedEdge(start=node_m_entity.nid, end=node_answer_entity.nid, relation=p2) edge3 = GroundedEdge(start=node_answer_entity.nid, end=node_c_entity.nid, relation=p3) edge4 = GroundedEdge(start=node_c_entity.nid, end=node_literal_entity.nid, relation=p4) candidate_graphquery["edges"] = [edge1, edge2, edge3, edge4] candidate_graphquery["path"] = p1_p2_p3_p4 candidate_graphquery["denotation"] = list( p1_p2_p3_p4_answers[p1_p2_p3_p4]) candidate_graphquery_list.append(candidate_graphquery) return candidate_graphquery_list
def _3_0_2_to_graphs(candidate_pathes, s1, t1): ''' #e-{p1}->*a-{p2}->*m-{p3}->literal 对应 "location.location.contains\tm.06s9y\tlocation.statistical_region.gdp_real\tm.0hnzhpd\tmeasurement_unit.adjusted_money_value.adjusted_value\t192244189.0 :param paths: :param s1: :param t1: :return: ''' candidate_graphquery_list = [] current_nid = 1 node_topic_entity = GroundedNode(nid=current_nid, node_type=t1, id=s1, type_class='', friendly_name="", question_node=0) current_nid += 1 node_answer_entity = GroundedNode(nid=current_nid, node_type="class", id='?a', type_class='', friendly_name="", question_node=1) current_nid += 1 node_m_entity = GroundedNode(nid=current_nid, node_type="class", id='?m', type_class='', friendly_name="", question_node=0) current_nid += 1 node_literal = GroundedNode(nid=current_nid, node_type="literal", id='?literal', type_class='', friendly_name="", question_node=0) p1_p2_p3_answers = collections.defaultdict(set) for candidate in candidate_pathes: cols = candidate.split('\t') if len(cols) != 6: continue p1, answer_entity, p2, m_entity, p3, literal_entity = candidate.split( "\t") p1_p2_p3_answers['\t'.join([p1, p2, p3])].add( (answer_entity, literal_entity)) for p1_p2_p3 in p1_p2_p3_answers: candidate_graphquery = dict() candidate_graphquery["querytype"] = '3_0_2' candidate_graphquery["nodes"] = [ node_topic_entity, node_answer_entity, node_m_entity, node_literal ] p1, p2, p3 = p1_p2_p3.split('\t') edge1 = GroundedEdge(start=node_topic_entity.nid, end=node_answer_entity.nid, relation=p1) edge2 = GroundedEdge(start=node_answer_entity.nid, end=node_m_entity.nid, relation=p2) edge3 = GroundedEdge(start=node_m_entity.nid, end=node_literal.nid, relation=p3) candidate_graphquery["edges"] = [edge1, edge2, edge3] candidate_graphquery["path"] = p1_p2_p3 candidate_graphquery["denotation"] = list(p1_p2_p3_answers[p1_p2_p3]) candidate_graphquery_list.append(candidate_graphquery) return candidate_graphquery_list
def extract_grounded_graph_from_jena_freebase(file_path): ''' :argument: file path ='./2019.04_15_complexwebq_test_bgp.txt' :return qid_to_graphs_dict qid_to_grounded_graph_dict = complexwebquestion_interface.extract_grounded_graph_from_jena(globals_args.fn_cwq_file.complexwebquestion_test_bgp_dir) for qid, grounded_graph in qid_to_grounded_graph_dict.items(): print (qid, grounded_graph) ''' qid_to_graphs_dict = dict() lines = read_list_yuanshi(file_path) triples_list = [] nodes_set = [] qid = None # question = None for line in lines: if line.startswith('#QID'): qid = line.split('\t')[2] triples_list = [] nodes_set = [] # elif line.startswith('#question'): # if len(line.split('\t'))==2: # question = line.split('\t')[1] # else: # question='' elif line.startswith('#2.2_triple'): triples_list.append(line.split('\t')[1:]) elif line.startswith('#2.2_node'): nodes_set.append(line) if line.startswith('-------------------'): grounded_nodes = [] grounded_edges = [] # id = 20 for node_line in nodes_set: # id += 1 cols = node_line.split('\t') node_id = cols[1] node = GroundedNode(id=node_id, nid=node_id) if node.id == '?x': node.question_node = 1 if node_id.startswith('?'): node.node_type = 'class' elif node_id.startswith('m.') or node_id.startswith( 'g.') or node_id.startswith('en.'): node.node_type = 'entity' else: node.node_type = 'literal' if len(cols) == 3: node.friendly_name = eval( cols[2]) #set #2.2_node: m.03_dwn {'Lou Seal'} elif len(cols) == 4: node.friendly_name = eval( cols[3] ) ##2.2_node: ?x False {'m.0117q3yz': {'base.type_ontology.abstract', 'common.topic', 'base.type_ontology.inanimate', 'base.type_ontology.non_agent', 'time.event', 'sports.sports_championship_event'}} grounded_nodes.append(node) for triple in triples_list: start_node = _get_node_by_id( grounded_nodes, triple[0]) #.replace('http://rdf.freebase.com/ns/','') end_node = _get_node_by_id( grounded_nodes, triple[2]) #.replace('http://rdf.freebase.com/ns/','') if triple[1] == 'common.topic.notable_types': end_node.node_type = 'class' if end_node.nid.startswith('?'): grounded_edges.append( GroundedEdge(start=start_node.nid, end=end_node.nid, relation=triple[1], friendly_name=triple[1])) else: continue else: grounded_edges.append( GroundedEdge(start=start_node.nid, end=end_node.nid, relation=triple[1], friendly_name=triple[1])) if len(grounded_nodes) > 0: qid_to_graphs_dict[qid] = GrounedGraph(nodes=grounded_nodes, edges=grounded_edges) return qid_to_graphs_dict
def extract_grounded_graph_from_jena_dbpedia(file_path): ''' :argument: file path './2019.07.17_qald_test_bgp.txt' :return qid_to_graphs_dict for qid, grounded_graph in qid_to_grounded_graph_dict.items(): print (qid, grounded_graph) ''' qid_to_graphs_dict = dict() lines = read_list_yuanshi(file_path) triples_list = [] nodes_set = [] qid = None question = None for line in lines: if line.startswith('#Qid'): # qid = 'train_'+line.split('\t')[2] qid = line.split('\t')[2] triples_list = [] nodes_set = [] elif line.startswith('#Question'): if len(line.split('\t')) == 2: question = line.split('\t')[1] else: question = '' elif line.startswith('#2.2_triple'): triples_list.append(line.split('\t')[1:]) elif line.startswith('#2.2_node'): nodes_set.append(line) if line.startswith('-------------------'): grounded_nodes = [] grounded_edges = [] for node_line in nodes_set: # cols = node_line.split('\t') # node_id = cols[1].replace('http://dbpedia.org/resource/','') node_id = node_line.split('\t')[1] node = GroundedNode(id=node_id, nid=node_id) if node.id == '?uri': # question node node.question_node = 1 if node_id.startswith('http://dbpedia.org/resource/'): node.node_type = 'entity' else: #?x node.node_type = 'class' # if node_id.startswith('?'): # node.node_type = 'class' # elif node_id.startswith('http://dbpedia.org/resource/'): # node.node_type = 'entity' # else: # node.node_type = 'class' grounded_nodes.append(node) for triple in triples_list: start_node = _get_node_by_id(grounded_nodes, triple[0]) end_node = _get_node_by_id(grounded_nodes, triple[2]) if triple[ 1] == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type': #2020.04.24 add continue grounded_edges.append( GroundedEdge(start=start_node.nid, end=end_node.nid, relation=triple[1], friendly_name=triple[1])) qid_to_graphs_dict[qid] = GrounedGraph(nodes=grounded_nodes, edges=grounded_edges) return qid_to_graphs_dict