def run_grounding_graph_path_match(input_file_folder): '''path candidate grounding graph''' from grounding.ranking.path_match_nn.path_match_interface import PathMatchByLexicalNN import os from parsing.parsing_utils import extract_importantwords_from_question all_data_path = os.listdir(input_file_folder) pml = PathMatchByLexicalNN() for path in all_data_path: print(path) structure_with_grounded_graphq_file = input_file_folder + path structure_list = read_structure_file( structure_with_grounded_graphq_file) for structure in structure_list: question = structure.question for ungrounded_graph in structure.ungrounded_graph_forest: importantwords_list = extract_importantwords_from_question( question=question, ungrounded_graph=ungrounded_graph) print(importantwords_list, len(ungrounded_graph.get_grounded_graph_forest())) for grounded_graph in ungrounded_graph.get_grounded_graph_forest( ): grounded_graph.score = pml.get_path_pro( grounded_graph.key_path, importantwords_list ) # '\t'.join(grounded_graph.key_path), print(grounded_graph.key_path, importantwords_list, grounded_graph.score) write_structure_file(structure_list, structure_with_grounded_graphq_file)
def run_grounding_graph_question_match_minus(input_file_folder): '''path candidate grounding graph''' import os from common import utils for path in os.listdir(input_file_folder): print(path) structure_with_grounded_graphq_file = input_file_folder + path structure_list = read_structure_file( structure_with_grounded_graphq_file) all_score = [] for structure in structure_list: for ungrounded_graph in structure.ungrounded_graph_forest: for grounded_graph in ungrounded_graph.get_grounded_graph_forest( ): all_score.append(grounded_graph.score) all_score_guiyi = utils.Normalize(all_score) score_guiyi = dict() for i, score_ori in enumerate(all_score): score_guiyi[score_ori] = all_score_guiyi[i] for structure in structure_list: # qid = structure.qid for ungrounded_graph in structure.ungrounded_graph_forest: for grounded_graph in ungrounded_graph.get_grounded_graph_forest( ): # 4.跑 total - score_guiyi # grounded_graph.total_score = score_guiyi[grounded_graph.score] + qmi.get_score(qid, grounded_graph.denotation) grounded_graph.total_score = grounded_graph.total_score - score_guiyi[ grounded_graph.score] # return write_structure_file(structure_list, structure_with_grounded_graphq_file)
def run_grounded_node_grounding_freebase(structure_with_ungrounded_graphq_file, output_file): ''' #2.1 function: 1.0 ungrounded query -> 2.1 grounded query input: structure_ungrounded_graphq_file :return: grounded graph with entity linking ''' from grounding._2_1_grounded_graph import node_linking_interface_freebase from grounding._2_1_grounded_graph.grounded_graph_2_1_generation import generate_grounded_graph_interface structure_list = read_structure_file(structure_with_ungrounded_graphq_file) for structure in structure_list: print(structure.qid) for i, ungrounded_graph in enumerate( structure.get_ungrounded_graph_forest()): if i == len(structure.get_ungrounded_graph_forest()) - 1: grounding_result_list = [] for node in ungrounded_graph.nodes: grounding_result_list.append( (node, node_linking_interface_freebase.node_linking( qid=structure.qid, node=node))) grouned_graph_list = generate_grounded_graph_interface( ungrounded_graph=ungrounded_graph, grounding_result_list=grounding_result_list) ungrounded_graph.set_grounded_linking(grounding_result_list) ungrounded_graph.set_grounded_graph_forest(grouned_graph_list) write_structure_file(structure_list, output_file)
def computed_every_grounded_graph_f1_cwq(input_file): from datasets_interface.question_interface import complexwebquestion_interface all_structure_path = os.listdir(input_file) error_list = [] for structure_path in all_structure_path: structure_with_grounded_graphq_file = input_file + structure_path print(structure_path) try: structure_list = read_structure_file( structure_with_grounded_graphq_file) for structure in structure_list: gold_answer_mid_set = complexwebquestion_interface.get_answers_by_question( structure.question) for ungrounded_graph in structure.ungrounded_graph_forest: for grounded_graph in ungrounded_graph.get_grounded_graph_forest( ): system_denotation_set = set(grounded_graph.denotation) recall, precision, f1 = sempre_evaluation.computeF1( gold_answer_mid_set, system_denotation_set) grounded_graph.f1_score = f1 grounded_graph.recall_score = recall grounded_graph.precision_score = precision write_structure_file(structure_list, input_file + structure_path) except Exception as e: print('error') error_list.append(structure_path) print('error_list:\t', error_list)
def computed_every_grounded_graph_f1_graphq(input_file): from datasets_interface.question_interface import graphquestion_interface for structure_path in os.listdir(input_file): structure_with_grounded_graphq_file = input_file + structure_path print(structure_path) structure_list = read_structure_file( structure_with_grounded_graphq_file) for structure in structure_list: gold_answers_mid_set = graphquestion_interface.get_answers_mid_by_question( structure.question) for ungrounded_graph in structure.ungrounded_graph_forest: for grounded_graph in ungrounded_graph.get_grounded_graph_forest( ): new_system_answers_list = [] for system_answer in set(grounded_graph.denotation): if isinstance(system_answer, int): new_system_answers_list.append(str(system_answer)) else: new_system_answers_list.append(system_answer) recall, precision, f1 = sempre_evaluation.computeF1( gold_answers_mid_set, new_system_answers_list) grounded_graph.f1_score = f1 grounded_graph.recall_score = recall grounded_graph.precision_score = precision if f1 > 0: print( structure_path, f1 ) # print(structure_path, gold_answers_mid_set, new_system_answers_list, f1) structure.gold_answer = gold_answers_mid_set # update answers by answer mid list ["Kimberly-Clark"] ['en.kimberly-clark'] write_structure_file(structure_list, input_file + structure_path)
def run_grounded_node_grounding_dbpedia_gold( structure_with_ungrounded_graphq_file, output_file): ''' #2.1 function: 1.0 ungrounded query -> 2.1 grounded query input: structure_ungrounded_graphq_file :return: grounded graph with entity linking ''' from datasets_interface.question_interface import lcquad_1_0_interface from grounding._2_1_grounded_graph.grounded_graph_2_1_generation import generate_grounded_graph_interface structure_list = read_structure_file(structure_with_ungrounded_graphq_file) for structure in structure_list: print(structure.qid) for i, ungrounded_graph in enumerate( structure.get_ungrounded_graph_forest()): if i == len(structure.get_ungrounded_graph_forest()) - 1: grounding_result_list = [] for node in ungrounded_graph.nodes: # (node(barbaro), {'en.barbaro': 1.6}), get_el_result(question=structure.question, nid=node.nid) grounding_result_list.append( (node, lcquad_1_0_interface. get_topic_entities_list_by_question_and_nodemention( question=structure.question, mention=node.friendly_name))) grouned_graph_list = generate_grounded_graph_interface( ungrounded_graph=ungrounded_graph, grounding_result_list=grounding_result_list) ungrounded_graph.set_grounded_linking(grounding_result_list) ungrounded_graph.set_grounded_graph_forest(grouned_graph_list) write_structure_file(structure_list, output_file)
def run_grounded_graph_generation_by_structure_transformation( structure_with_grounded_graphq_node_grounding_file, output_file): from grounding._2_2_grounded_graph_offline import graph_2_1_to_2_2_by_transfer from grounding.grounded_graph_to_sparql import grounded_graph_to_sparql_CWQ def count_denotation_to_num(grounded_graph): ''' # counting # how many softwares are developed by google? ''' num = 0 denotation_set = grounded_graph.denotation if denotation_set is not None: num = len(denotation_set) return [num] structure_list = read_structure_file( structure_with_grounded_graphq_node_grounding_file) new_structure_list = [] error_qid_list = [] for i, structure in enumerate(structure_list): if str(structure.qid) + '.json' in os.listdir(output_file): continue new_structure_list.clear() print(i, structure.qid, structure.question) is_print = False for ungrounded_graph in structure.ungrounded_graph_forest: grounded_graph_forest = [] for _2_1_grounded_graph in ungrounded_graph.get_grounded_graph_forest( ): try: grounded_graph_forest.extend( graph_2_1_to_2_2_by_transfer. generate_candidates_by_2_1_grounded_graph_interface( _2_1_grounded_graph=_2_1_grounded_graph)) except Exception as e: print('#Error:', structure.qid, e) error_qid_list.append(structure.qid) # break if len(grounded_graph_forest) > 0: is_print = True print('#Size:', len(grounded_graph_forest)) for z in range(len(grounded_graph_forest)): grounded_graph_forest[ z].grounded_query_id = ungrounded_graph.ungrounded_query_id * 100000 + z grounded_graph_forest[ z].sparql_query = grounded_graph_to_sparql_CWQ( grounded_graph_forest[z]) if structure.function == 'count': grounded_graph_forest[ z].denotation = count_denotation_to_num( grounded_graph_forest[z]) ungrounded_graph.set_grounded_graph_forest(grounded_graph_forest) if is_print: new_structure_list.append(structure) write_structure_file(new_structure_list, output_file + str(structure.qid) + '.json') print('Error qid list:', error_qid_list)
def run_ungrounded_graph_from_graphq(graph_questions_filepath, output_file): from datasets_interface.question_interface import graphquestion_interface graph_questions_struct = graphquestion_interface.read_graph_question_json(graph_questions_filepath) tuples_list = [] for i in range(len(graph_questions_struct)): graphquestion = graph_questions_struct[i] tuples_list.append((graphquestion.qid, graphquestion.question, graphquestion.graph_query, graphquestion.answer)) structure_list = running_interface.run_query_graph_generation(tuples_list=tuples_list) write_structure_file(structure_list, output_file)
def computed_every_grounded_graph_f1_webq_name(input_file, answer_file, mid_to_names_file): # from datasets_interface.freebase import webquestions_interface # from evaluation.webq_denotation import webq_mid_to_names_process #------------------------------------------------ #read qid-to-answers qid_to_answers_dict = dict() lines = read_list(answer_file) for line in lines: cols = line.split('\t') qid_to_answers_dict[cols[0]] = eval(cols[2]) #------------------------------------------------ # mid to names dict mid_to_names_dict = dict() lines = read_list(mid_to_names_file) for line in lines: cols = line.split('\t') mid = cols[1] names = list(eval(cols[2])) mid_to_names_dict[mid] = names #------------------------------------------------ all_structure_path = os.listdir(input_file) for structure_path in all_structure_path: structure_with_grounded_graphq_file = input_file + structure_path structure_list = read_structure_file( structure_with_grounded_graphq_file) for structure in structure_list: qid = structure.qid gold_answer_names_set = evaluation_utils.search_for_answers_by_id( qid, qid_to_answers_dict) print(structure_path, '#gold:\t', gold_answer_names_set) for ungrounded_graph in structure.ungrounded_graph_forest: for grounded_graph in ungrounded_graph.get_grounded_graph_forest( ): system_denotation_names_set = set() for denotation_mid in grounded_graph.denotation: denotation_name = evaluation_utils.get_name_by_mid( denotation_mid, mid_to_names_dict) print('###denotation:\t', denotation_mid, denotation_name) if denotation_name is not None: system_denotation_names_set.add(denotation_name) else: print(denotation_mid, '#####error!!!', denotation_name) print('#gold:\t', gold_answer_names_set, '#system:\t', system_denotation_names_set) recall, precision, f1 = sempre_evaluation.computeF1( gold_answer_names_set, system_denotation_names_set) if f1 > 0.0: print('#result:\t', f1) grounded_graph.f1_score = f1 write_structure_file(structure_list, input_file + structure_path)
def run_ungrounded_graph_from_complexwebquestion( complexquestin_filepath, structure_with_1_ungrounded_cwq_file): from datasets_interface.question_interface import complexwebquestion_interface from common.hand_files import write_structure_file complexwebq_list = complexwebquestion_interface.read_complexwebq_question_json( complexquestin_filepath) tuples_list = [] for i, complexwebq_struct in enumerate(complexwebq_list): tuples_list.append( (complexwebq_struct.ID, complexwebq_struct.question, complexwebq_struct.sparql, complexwebq_struct.answers)) print(len(tuples_list)) structure_list = running_interface.run_query_graph_generation( tuples_list=tuples_list) write_structure_file(structure_list, structure_with_1_ungrounded_cwq_file)
def computed_every_grounded_graph_f1_graphq(input_file): from grounding.grounding_args import test_qid_to_answers_mid_dict, train_qid_to_answers_mid_dict for structure_path in os.listdir(input_file): #all_structure_path structure_with_grounded_graphq_file = input_file + structure_path print(structure_path) structure_list = read_structure_file( structure_with_grounded_graphq_file) for structure in structure_list: gold_answers_mid_set = [] qid = structure.qid if qid in test_qid_to_answers_mid_dict: gold_answers_mid_set = test_qid_to_answers_mid_dict[qid] elif qid in train_qid_to_answers_mid_dict: gold_answers_mid_set = train_qid_to_answers_mid_dict[qid] #[80] -> ['80'] new_gold_answers_set = set() for gold_answer in gold_answers_mid_set: if isinstance(gold_answer, int): new_gold_answers_set.add(str(gold_answer)) else: new_gold_answers_set.add(gold_answer) gold_answers_mid_set = list(new_gold_answers_set) for ungrounded_graph in structure.ungrounded_graph_forest: for grounded_graph in ungrounded_graph.get_grounded_graph_forest( ): system_denotation_set = set(grounded_graph.denotation) new_system_answers_set = set() for system_answer in system_denotation_set: if isinstance(system_answer, int): new_system_answers_set.add(str(system_answer)) else: new_system_answers_set.add(system_answer) new_system_answers_set = list(new_system_answers_set) recall, precision, f1 = sempre_evaluation.computeF1( gold_answers_mid_set, new_system_answers_set) print(structure_path, gold_answers_mid_set, new_system_answers_set, f1) grounded_graph.f1_score = f1 if f1 > 0: print(f1) # update answers by answer mid list ["Kimberly-Clark"] ['en.kimberly-clark'] structure.gold_answer = gold_answers_mid_set write_structure_file(structure_list, input_file + structure_path)
def run_topic_entities_from_lcquad(filepath, structure_with_1_ungrounded_lcquad_file, node_is_gold=False, linking_is_gold=False): from datasets_interface.question_interface import lcquad_1_0_interface lcquad_list = lcquad_1_0_interface.read_train_test_data(filepath) tuples_list = [] for i, lcquad_struct in enumerate(lcquad_list): tuples_list.append((lcquad_struct.qid, lcquad_struct.question_normal, lcquad_struct.sparql, None)) structure_list = ir_module.run_topics_entity_generation_dbpedia( tuples_list=tuples_list, node_is_gold=node_is_gold, linking_is_gold=linking_is_gold, q_mode='lcquad') write_structure_file(structure_list, structure_with_1_ungrounded_lcquad_file)
def run_topic_entities_from_cwq(filepath, structure_with_1_ungrounded_cwq_file, node_is_gold=False, linking_is_gold=False): from datasets_interface.question_interface import complexwebquestion_interface cwq_list = complexwebquestion_interface.read_complexwebq_question_json( filepath) tuples_list = [] for i, cwq_struct in enumerate(cwq_list): tuples_list.append((cwq_struct.ID, cwq_struct.question, cwq_struct.sparql, cwq_struct.answers)) structure_list = ir_module.run_topics_entity_generation_freebase( tuples_list=tuples_list, node_is_gold=node_is_gold, linking_is_gold=linking_is_gold, q_mode='cwq') write_structure_file(structure_list, structure_with_1_ungrounded_cwq_file)
def run_ungrounded_graph_from_lcquad(filepath, structure_with_1_ungrounded_lcquad_file, node_is_gold=False, parser_mode='skeleton'): from datasets_interface.question_interface import lcquad_1_0_interface lcquad_list = lcquad_1_0_interface.read_train_test_data(filepath) tuples_list = [] for i, lcquad_struct in enumerate(lcquad_list): tuples_list.append((lcquad_struct.qid, lcquad_struct.question_normal, lcquad_struct.sparql, None)) print(len(tuples_list)) structure_list = sp_modules.run_query_graph_generation( tuples_list=tuples_list, node_is_gold=node_is_gold, parser_mode=parser_mode, q_mode='lcquad') write_structure_file(structure_list, structure_with_1_ungrounded_lcquad_file)
def run_ungrounded_graph_from_graphq(graph_questions_filepath, output_file, node_is_gold=False, parser_mode='skeleton'): from datasets_interface.question_interface import graphquestion_interface graph_questions_struct = graphquestion_interface.read_graph_question_json( graph_questions_filepath) tuples_list = [] for i, graphquestion in enumerate(graph_questions_struct): tuples_list.append((graphquestion.qid, graphquestion.question, graphquestion.graph_query, graphquestion.answer)) print(len(tuples_list)) structure_list = sp_modules.run_query_graph_generation( tuples_list=tuples_list, node_is_gold=node_is_gold, parser_mode=parser_mode, q_mode='graphq') write_structure_file(structure_list, output_file)
def run_grounding_graph_add_question_match(input_file_folder): '''path candidate grounding graph''' all_data_path = os.listdir(input_file_folder) from grounding.ranking.path_match_sentence_level.question_match_interface import QuestionMatchInterface qmi = QuestionMatchInterface() for path in all_data_path: print(path) structure_with_grounded_graphq_file = input_file_folder + path structure_list = read_structure_file( structure_with_grounded_graphq_file) for structure in structure_list: qid = structure.qid for ungrounded_graph in structure.ungrounded_graph_forest: for grounded_graph in ungrounded_graph.get_grounded_graph_forest( ): grounded_graph.total_score = grounded_graph.score + qmi.get_score( qid, grounded_graph.denotation) write_structure_file(structure_list, structure_with_grounded_graphq_file)
def run_grounding_graph_guiyi_add_question_match(input_file_folder): '''path candidate grounding graph''' import os from common import utils from grounding.ranking.path_match_nn.question_match_interface import QuestionMatchInterface qmi = QuestionMatchInterface() for path in os.listdir(input_file_folder): print(path) structure_with_grounded_graphq_file = input_file_folder + path structure_list = read_structure_file( structure_with_grounded_graphq_file) all_score = [] for structure in structure_list: for ungrounded_graph in structure.ungrounded_graph_forest: for grounded_graph in ungrounded_graph.get_grounded_graph_forest( ): all_score.append(grounded_graph.score) all_score_guiyi = utils.Normalize(all_score) score_guiyi = dict() for i, score_ori in enumerate(all_score): score_guiyi[score_ori] = all_score_guiyi[i] for structure in structure_list: qid = structure.qid for ungrounded_graph in structure.ungrounded_graph_forest: for grounded_graph in ungrounded_graph.get_grounded_graph_forest( ): # 3.单独跑 question match # grounded_graph.total_score = qmi.get_score(qid, grounded_graph.denotation) # if grounded_graph.total_score > 0: # print ('\t\t', grounded_graph.total_score) # 4.单独跑 question match grounded_graph.score = qmi.get_score( qid, grounded_graph.denotation) if grounded_graph.score > 0: print('\t\t', grounded_graph.score) # 4.跑word match+question match # grounded_graph.total_score = score_guiyi[grounded_graph.score] + qmi.get_score(qid, grounded_graph.denotation) # return write_structure_file(structure_list, structure_with_grounded_graphq_file)
def run_candidate_graph_generation(structure_with_1_ungrounded_lcquad_file, output_file, q_mode='lcquad'): from method_ir.grounding import graph_2_1_to_2_2_ir from method_sp.grounding import grounded_graph_to_sparql from method_sp.grounding import sparql_to_denotation import os structure_list = read_structure_file(structure_with_1_ungrounded_lcquad_file) error_qid_list = [] for _, structure in enumerate(structure_list): if str(structure.qid) + '.json' in os.listdir(output_file): continue print(structure.qid) compositionality_type = structure.compositionality_type for j, ungrounded_graph in enumerate(structure.ungrounded_graph_forest): if j != len(structure.ungrounded_graph_forest) - 1: continue grounded_graph_forest = [] for _2_1_grounded_graph in ungrounded_graph.get_grounded_graph_forest(): try: if q_mode == 'graphq': grounded_graph_forest.extend(graph_2_1_to_2_2_ir.get_oracle_graphs_by_2_1_graph_graphq(_2_1_grounded_graph=_2_1_grounded_graph, qtype=compositionality_type)) elif q_mode == 'cwq': grounded_graph_forest.extend(graph_2_1_to_2_2_ir.get_oracle_graphs_by_2_1_graph_cwq(_2_1_grounded_graph=_2_1_grounded_graph, qtype=compositionality_type)) elif q_mode == 'lcquad': grounded_graph_forest.extend(graph_2_1_to_2_2_ir.get_oracle_graphs_by_2_1_graph_lcquad(_2_1_grounded_graph=_2_1_grounded_graph, qtype=compositionality_type)) except Exception as e: print('#Error:', structure.qid, e) grounded_graph_forest.clear() error_qid_list.append(structure.qid) break for z in range(len(grounded_graph_forest)): grounded_graph_forest[z].grounded_query_id = ungrounded_graph.ungrounded_query_id * 100000 + z grounded_graph_forest[z].sparql_query = grounded_graph_to_sparql.grounded_graph_to_sparql(grounded_graph=grounded_graph_forest[z], q_function=structure.function, q_compositionality_type=structure.compositionality_type, q_mode=q_mode) grounded_graph_forest[z].denotation = sparql_to_denotation.set_denotation(grounded_graph=grounded_graph_forest[z], q_compositionality_type=structure.compositionality_type) ungrounded_graph.set_grounded_graph_forest(grounded_graph_forest) print('#size:\t', len(grounded_graph_forest)) if len(grounded_graph_forest) > 0: write_structure_file([structure], output_file + str(structure.qid) + '.json') print('Error qid list:', error_qid_list)
def computed_every_grounded_graph_f1_webq_mid(input_file, answer_file): #read qid-to-answers all_structure_path = os.listdir(input_file) for structure_path in all_structure_path: structure_with_grounded_graphq_file = input_file + structure_path structure_list = read_structure_file( structure_with_grounded_graphq_file) for structure in structure_list: qid = structure.qid gold_answer_mid_set = evaluation_utils.search_for_answers_by_id( qid, qid_to_answers_dict) print(structure_path, gold_answer_mid_set) for ungrounded_graph in structure.ungrounded_graph_forest: for grounded_graph in ungrounded_graph.get_grounded_graph_forest( ): system_denotation_set = set(grounded_graph.denotation) recall, precision, f1 = sempre_evaluation.computeF1( gold_answer_mid_set, system_denotation_set) grounded_graph.f1_score = f1 write_structure_file(structure_list, input_file + structure_path)
def run_topic_entities_from_graphq(graph_questions_filepath, structure_with_1_ungrounded_graphq_file, node_is_gold=False, linking_is_gold=False): from datasets_interface.question_interface import graphquestion_interface graph_questions_struct = graphquestion_interface.read_graph_question_json( graph_questions_filepath) tuples_list = [] for i, graphquestion in enumerate(graph_questions_struct): tuples_list.append( (graphquestion.qid, graphquestion.question, graphquestion.graph_query, graphquestion.answer_mid)) print(len(tuples_list)) structure_list = ir_module.run_topics_entity_generation_freebase( tuples_list=tuples_list, node_is_gold=node_is_gold, linking_is_gold=linking_is_gold, q_mode='graphq') write_structure_file(structure_list, structure_with_1_ungrounded_graphq_file)
def computed_every_grounded_graph_f1_cwq(input_file): all_structure_path = os.listdir(input_file) for structure_path in all_structure_path: structure_with_grounded_graphq_file = input_file + structure_path print(structure_path) structure_list = read_structure_file( structure_with_grounded_graphq_file) for structure in structure_list: gold_answer_mid_set = evaluation_utils.get_gold_answers( structure.gold_answer) for ungrounded_graph in structure.ungrounded_graph_forest: for grounded_graph in ungrounded_graph.get_grounded_graph_forest( ): system_denotation_set = set(grounded_graph.denotation) recall, precision, f1 = sempre_evaluation.computeF1( gold_answer_mid_set, system_denotation_set) grounded_graph.f1_score = f1 if f1 > 0: print(f1) write_structure_file(structure_list, input_file + structure_path)
def computed_every_grounded_graph_f1_lcquad(input_file): from datasets_interface.question_interface import lcquad_1_0_interface for structure_path in os.listdir(input_file): structure_with_grounded_graphq_file = input_file + structure_path print(structure_path) structure_list = read_structure_file( structure_with_grounded_graphq_file) for structure in structure_list: gold_answer_mid_set = lcquad_1_0_interface.get_answers_by_question( structure.question) #['http://dbpedia.org/resource/Colorado'] for ungrounded_graph in structure.ungrounded_graph_forest: for grounded_graph in ungrounded_graph.get_grounded_graph_forest( ): system_denotation_set = set(grounded_graph.denotation) recall, precision, f1 = sempre_evaluation.computeF1( gold_answer_mid_set, system_denotation_set) grounded_graph.f1_score = f1 grounded_graph.recall_score = recall grounded_graph.precision_score = precision write_structure_file(structure_list, input_file + structure_path)
def run_ungrounded_graph_from_complexwebquestion( complexquestin_filepath, structure_with_1_ungrounded_cwq_file, node_is_gold=False, parser_mode='skeleton'): from datasets_interface.question_interface import complexwebquestion_interface complexwebq_list = complexwebquestion_interface.read_complexwebq_question_json( complexquestin_filepath) tuples_list = [] for i, complexwebq_struct in enumerate(complexwebq_list): tuples_list.append( (complexwebq_struct.ID, complexwebq_struct.question, complexwebq_struct.sparql, complexwebq_struct.answers)) print(len(tuples_list)) structure_list = sp_modules.run_query_graph_generation( tuples_list=tuples_list, node_is_gold=node_is_gold, parser_mode=parser_mode, q_mode='cwq') write_structure_file(structure_list, structure_with_1_ungrounded_cwq_file)
def computed_every_grounded_graph_f1_lcquad(input_file): from datasets_interface.question_interface import lcquad_1_0_interface all_structure_path = os.listdir(input_file) for structure_path in all_structure_path: structure_with_grounded_graphq_file = input_file + structure_path print(structure_path) structure_list = read_structure_file( structure_with_grounded_graphq_file) for structure in structure_list: gold_answer_mid_set = lcquad_1_0_interface.get_answers_by_question( structure.question) print('#gold answer:\t', gold_answer_mid_set) for ungrounded_graph in structure.ungrounded_graph_forest: for grounded_graph in ungrounded_graph.get_grounded_graph_forest( ): system_denotation_set = set(grounded_graph.denotation) recall, precision, f1 = sempre_evaluation.computeF1( gold_answer_mid_set, system_denotation_set) grounded_graph.f1_score = f1 if f1 > 0: print(f1) write_structure_file(structure_list, input_file + structure_path)
def run_grounding_graph_score12_match(input_file_folder, q_mode='lcquad'): """path candidate grounding graph""" from method_ir.grounding.path_match_score12.path_match_interface import PathMatchScore12 path_match_score12 = PathMatchScore12(q_mode) for path in os.listdir(input_file_folder): structure_with_grounded_graphq_file = input_file_folder + path print(path) structure_list = read_structure_file(structure_with_grounded_graphq_file) for structure in structure_list: question = structure.question for j, ungrounded_graph in enumerate(structure.ungrounded_graph_forest): if j != len(structure.ungrounded_graph_forest) - 1: continue grounded_graph_list = ungrounded_graph.get_grounded_graph_forest() try: bert_scores = path_match_score12.set_bert_score_score12(question_normal=question, grounded_graph_forest_list=grounded_graph_list) for grounded_graph, bert_score in zip(grounded_graph_list, bert_scores): grounded_graph.score = bert_score except Exception as e: for grounded_graph in grounded_graph_list: grounded_graph.score = 0.0 print('error') write_structure_file(structure_list, structure_with_grounded_graphq_file) print('over')
def run_grounded_node_grounding_dbpedia(structure_with_ungrounded_graphq_file, output_file, linking_is_gold=False): ''' function: 1.0 ungrounded query -> 2.1 grounded query input: structure_ungrounded_graphq_file :return: grounded graph with entity linking ''' from datasets_interface.question_interface import lcquad_1_0_interface from method_sp.grounding._2_1_grounded_graph.node_linking import node_linking_interface_dbpedia from method_sp.grounding._2_1_grounded_graph.grounded_graph_2_1_generation import generate_grounded_graph_interface structure_list = read_structure_file(structure_with_ungrounded_graphq_file) for structure in structure_list: print(structure.qid) for i, ungrounded_graph in enumerate( structure.get_ungrounded_graph_forest()): if i != len(structure.get_ungrounded_graph_forest()) - 1: continue grounding_result_list = [] for node in ungrounded_graph.nodes: if linking_is_gold: result_dict = lcquad_1_0_interface.get_topic_entities_by_question_and_mention( question=structure.question, mention=node.friendly_name) grounding_result_list.append((node, result_dict)) else: grounding_result_list.append( (node, node_linking_interface_dbpedia.node_linking( node=node))) grouned_graph_list = generate_grounded_graph_interface( ungrounded_graph=ungrounded_graph, grounding_result_list=grounding_result_list) ungrounded_graph.set_grounded_linking(grounding_result_list) ungrounded_graph.set_grounded_graph_forest(grouned_graph_list) write_structure_file(structure_list, output_file)
def run_grounding_graph_cnn_match(input_file_folder): '''path candidate grounding graph''' import os from grounding.ranking.path_match_cnn.cnn_match_interface import CNNMatchInterface from parsing import parsing_utils cmi = CNNMatchInterface() for path in os.listdir(input_file_folder): print(path) structure_with_grounded_graphq_file = input_file_folder + path structure_list = read_structure_file( structure_with_grounded_graphq_file) for structure in structure_list: question = structure.question for ungrounded_graph in structure.ungrounded_graph_forest: question = parsing_utils.extract_importantwords_from_cnn( question, ungrounded_graph) for grounded_graph in ungrounded_graph.get_grounded_graph_forest( ): grounded_graph.score = cmi.get_path_pro( (grounded_graph.key_path), question) print(grounded_graph.key_path, question, grounded_graph.score) write_structure_file(structure_list, structure_with_grounded_graphq_file)
def run_grounding_graph_update_denotation_graphq(input_file_folder): for path in os.listdir(input_file_folder): structure_with_grounded_graphq_file = input_file_folder + path print(path) structure_list = read_structure_file( structure_with_grounded_graphq_file) is_aggregation = False for structure in structure_list: qtype = structure.compositionality_type assert qtype in ["bgp", "count", "superlative", "comparative"] if qtype in ['bgp', 'count']: continue is_aggregation = True for j, ungrounded_graph in enumerate( structure.ungrounded_graph_forest): if j != len(structure.ungrounded_graph_forest) - 1: continue grounded_graph_list = ungrounded_graph.get_grounded_graph_forest( ) if qtype in ['superlative']: superlative_q_type = structure.function for grounded_graph in grounded_graph_list: print(grounded_graph.grounded_query_id) sample_value = grounded_graph.denotation[0][1] denotation_length = len(grounded_graph.denotation) new_denotation = set() try: if denotation_length > 1 and operation_utils.is_number( sample_value): start = time.clock() new_denotation.add( operation_utils. get_denotation_with_superlativeconstraint_float( denotation_with_value_list= grounded_graph.denotation, superlative_type=superlative_q_type)) end = time.clock() print( 'get_denotation_with_superlativeconstraint_float:\t', end - start) elif denotation_length > 1 and operation_utils.isVaildDate( sample_value): start = time.clock() new_denotation.add( operation_utils. get_denotation_with_superlativeconstraint_datetime( denotation_with_value_list= grounded_graph.denotation, superlative_type=superlative_q_type)) end = time.clock() print( 'get_denotation_with_superlativeconstraint_datetime:\t', end - start) else: start = time.clock() for answer in grounded_graph.denotation: if type(answer) == list: new_denotation.add(answer[0]) else: new_denotation.add(answer) end = time.clock() print('grounded_graph.denotation1:\t', end - start) except Exception as e: start = time.clock() for answer in grounded_graph.denotation: if type(answer) == list: new_denotation.add(answer[0]) else: new_denotation.add(answer) end = time.clock() print('grounded_graph.denotation2:\t', end - start) grounded_graph.denotation = list(new_denotation) elif qtype in ['comparative']: q_function = structure.function normalization_value = None for ungrounded_node in ungrounded_graph.nodes: if ungrounded_node.normalization_value is not None: normalization_value = ungrounded_node.normalization_value break for grounded_graph in grounded_graph_list: denotation_length = len(grounded_graph.denotation) try: if denotation_length > 1 and ( '^^xsd:dateTime' in normalization_value or '^^http://www.w3.org/2001/XMLSchema#datetime' in normalization_value): new_denotation = operation_utils.filter_by_datetime_compare( denotation=grounded_graph.denotation, compare_element=normalization_value.split( '^^')[0], q_function=q_function) elif denotation_length > 1: # composition[['1768.0^^http://www.w3.org/2001/XMLSchema#double', 'literal']] # composition[['1^^http://www.w3.org/2001/XMLSchema#int', 'literal']] if '^^' in normalization_value: normalization_value = normalization_value.split( '^^')[0] new_denotation = operation_utils.filter_by_float_compare( denotation=grounded_graph.denotation, compare_element=normalization_value, q_function=q_function) else: all_denotation = set() for answer in grounded_graph.denotation: if type(answer) == list: all_denotation.add(answer[0]) else: all_denotation.add(answer) new_denotation = list(all_denotation) grounded_graph.denotation = new_denotation except Exception as e: all_denotation = set() for answer in grounded_graph.denotation: if type(answer) == list: all_denotation.add(answer[0]) else: all_denotation.add(answer) print('error', grounded_graph.grounded_query_id, grounded_graph.denotation) grounded_graph.denotation = list(all_denotation) if is_aggregation: write_structure_file(structure_list, structure_with_grounded_graphq_file) print('over')
def run_grounded_node_grounding_freebase(structure_with_ungrounded_graphq_file, output_file, linking_is_gold=False, q_mode='graphq'): ''' function: 1.0 ungrounded query -> 2.1 grounded query input: structure_ungrounded_graphq_file :return: grounded graph with entity linking ''' from common import globals_args from datasets_interface.question_interface import graphquestion_interface from datasets_interface.question_interface import complexwebquestion_interface from method_sp.grounding._2_1_grounded_graph.node_linking import node_linking_interface_freebase from method_sp.grounding._2_1_grounded_graph.grounded_graph_2_1_generation import generate_grounded_graph_interface assert q_mode in ['graphq', 'cwq'] if q_mode == 'cwq': # aqqu entity linking from method_sp.grounding._2_1_grounded_graph.node_linking.entity_linking_aqqu_vocab.surface_index_memory import EntitySurfaceIndexMemory elp = EntitySurfaceIndexMemory( entity_list_file=globals_args.kb_freebase_latest_file. entity_list_file, surface_map_file=globals_args.kb_freebase_latest_file. surface_map_file, entity_index_prefix=globals_args.kb_freebase_latest_file. entity_index_prefix) elif q_mode == 'graphq': from method_sp.grounding._2_1_grounded_graph.node_linking.entity_linking_en_vocab.entity_link_pipeline import EntityLinkPipeline elp = EntityLinkPipeline( freebase_graph_name_entity_file=globals_args.kb_freebase_en_2013. freebase_graph_name_entity, freebase_graph_alias_entity_file=globals_args.kb_freebase_en_2013. freebase_graph_alias_entity, graphquestions_train_friendlyname_entity_file=globals_args. kb_freebase_en_2013.graphquestions_train_friendlyname_entity, clueweb_mention_pro_entity_file=globals_args.kb_freebase_en_2013. clueweb_mention_pro_entity) structure_list = read_structure_file(structure_with_ungrounded_graphq_file) for structure in structure_list: print(structure.qid) for i, ungrounded_graph in enumerate( structure.get_ungrounded_graph_forest()): if i != len(structure.get_ungrounded_graph_forest()) - 1: continue grounding_result_list = [] for node in ungrounded_graph.nodes: if linking_is_gold: assert q_mode in ['graphq', 'cwq'] result_dict = dict() if q_mode == 'graphq': if node.node_type in ['entity', 'class', 'literal']: result_dict = graphquestion_interface.get_topic_entities_by_question_and_mention( question=structure.question, mention=node.friendly_name) elif q_mode == 'cwq': if node.node_type in ['entity', 'class', 'literal']: result_dict = complexwebquestion_interface.get_topic_entities_by_question_and_mention( question_normal=structure.question, mention=node.friendly_name) grounding_result_list.append((node, result_dict)) else: grounding_result_list.append( (node, node_linking_interface_freebase.node_linking( node=node, elp=elp))) grouned_graph_list = generate_grounded_graph_interface( ungrounded_graph=ungrounded_graph, grounding_result_list=grounding_result_list) ungrounded_graph.set_grounded_linking(grounding_result_list) ungrounded_graph.set_grounded_graph_forest(grouned_graph_list) write_structure_file(structure_list, output_file)