def run_grounded_node_grounding_freebase(structure_with_ungrounded_graphq_file, output_file): ''' #2.1 function: 1.0 ungrounded query -> 2.1 grounded query input: structure_ungrounded_graphq_file :return: grounded graph with entity linking ''' from grounding._2_1_grounded_graph import node_linking_interface_freebase from grounding._2_1_grounded_graph.grounded_graph_2_1_generation import generate_grounded_graph_interface structure_list = read_structure_file(structure_with_ungrounded_graphq_file) for structure in structure_list: print(structure.qid) for i, ungrounded_graph in enumerate( structure.get_ungrounded_graph_forest()): if i == len(structure.get_ungrounded_graph_forest()) - 1: grounding_result_list = [] for node in ungrounded_graph.nodes: grounding_result_list.append( (node, node_linking_interface_freebase.node_linking( qid=structure.qid, node=node))) grouned_graph_list = generate_grounded_graph_interface( ungrounded_graph=ungrounded_graph, grounding_result_list=grounding_result_list) ungrounded_graph.set_grounded_linking(grounding_result_list) ungrounded_graph.set_grounded_graph_forest(grouned_graph_list) write_structure_file(structure_list, output_file)
def compute_all_questions_recall(input_file): ''' # oracle all recall by max f1 :param input_file: :return: ''' all_data_path = os.listdir(input_file) all_recall = 0 error_list = [] for path in all_data_path: try: structure_list = read_structure_file(input_file + path) max_f1 = 0 question = None for structure in structure_list: question = structure.question for ungrounded_graph in structure.ungrounded_graph_forest: for grounded_graph in ungrounded_graph.get_grounded_graph_forest( ): if max_f1 < grounded_graph.f1_score: max_f1 = grounded_graph.f1_score all_recall += max_f1 if max_f1 != 1.0: print(('%s\t%s\t%s') % (path, question, str(max_f1))) except Exception as e: print(e) error_list.append(path) print('#error_list:\t', error_list) print(all_recall, len(all_data_path))
def generate_paths_graphq_interface_from_graph_2_1( structure_with_2_1_grounded_graph_file): structure_list = read_structure_file( structure_with_2_1_grounded_graph_file) error_qid_list = [] # structure_list = structure_list[0:1000] # structure_list = structure_list[1000:2000] # structure_list = structure_list[2000:3000] # structure_list = structure_list[3000:4000] # structure_list = structure_list[4000:5000] # structure_list = structure_list[5000:6000] # structure_list = structure_list[6000:7000] # structure_list = structure_list[7000:8000] # structure_list = structure_list[8000:9000] # structure_list = structure_list[9000:10000] for i, structure in enumerate(structure_list): for ungrounded_graph in structure.ungrounded_graph_forest: for _2_1_grounded_graph in ungrounded_graph.get_grounded_graph_forest( ): try: entities_list = grounding_utils.convert_2_1_graph_to_qid_entities( _2_1_graph=_2_1_grounded_graph) if len(entities_list) == 1: print( ('%s\t%s\t%s') % (structure.qid, 'composition', str(entities_list))) elif len(entities_list) == 2: print( ('%s\t%s\t%s') % (structure.qid, 'conjunction', str(entities_list))) except Exception as e: error_qid_list.append(structure.qid) print('#error:\t', error_qid_list)
def run_grounding_graph_question_match_minus(input_file_folder): '''path candidate grounding graph''' import os from common import utils for path in os.listdir(input_file_folder): print(path) structure_with_grounded_graphq_file = input_file_folder + path structure_list = read_structure_file( structure_with_grounded_graphq_file) all_score = [] for structure in structure_list: for ungrounded_graph in structure.ungrounded_graph_forest: for grounded_graph in ungrounded_graph.get_grounded_graph_forest( ): all_score.append(grounded_graph.score) all_score_guiyi = utils.Normalize(all_score) score_guiyi = dict() for i, score_ori in enumerate(all_score): score_guiyi[score_ori] = all_score_guiyi[i] for structure in structure_list: # qid = structure.qid for ungrounded_graph in structure.ungrounded_graph_forest: for grounded_graph in ungrounded_graph.get_grounded_graph_forest( ): # 4.跑 total - score_guiyi # grounded_graph.total_score = score_guiyi[grounded_graph.score] + qmi.get_score(qid, grounded_graph.denotation) grounded_graph.total_score = grounded_graph.total_score - score_guiyi[ grounded_graph.score] # return write_structure_file(structure_list, structure_with_grounded_graphq_file)
def run_grounded_node_grounding_dbpedia_gold( structure_with_ungrounded_graphq_file, output_file): ''' #2.1 function: 1.0 ungrounded query -> 2.1 grounded query input: structure_ungrounded_graphq_file :return: grounded graph with entity linking ''' from datasets_interface.question_interface import lcquad_1_0_interface from grounding._2_1_grounded_graph.grounded_graph_2_1_generation import generate_grounded_graph_interface structure_list = read_structure_file(structure_with_ungrounded_graphq_file) for structure in structure_list: print(structure.qid) for i, ungrounded_graph in enumerate( structure.get_ungrounded_graph_forest()): if i == len(structure.get_ungrounded_graph_forest()) - 1: grounding_result_list = [] for node in ungrounded_graph.nodes: # (node(barbaro), {'en.barbaro': 1.6}), get_el_result(question=structure.question, nid=node.nid) grounding_result_list.append( (node, lcquad_1_0_interface. get_topic_entities_list_by_question_and_nodemention( question=structure.question, mention=node.friendly_name))) grouned_graph_list = generate_grounded_graph_interface( ungrounded_graph=ungrounded_graph, grounding_result_list=grounding_result_list) ungrounded_graph.set_grounded_linking(grounding_result_list) ungrounded_graph.set_grounded_graph_forest(grouned_graph_list) write_structure_file(structure_list, output_file)
def computed_every_grounded_graph_f1_cwq(input_file): from datasets_interface.question_interface import complexwebquestion_interface all_structure_path = os.listdir(input_file) error_list = [] for structure_path in all_structure_path: structure_with_grounded_graphq_file = input_file + structure_path print(structure_path) try: structure_list = read_structure_file( structure_with_grounded_graphq_file) for structure in structure_list: gold_answer_mid_set = complexwebquestion_interface.get_answers_by_question( structure.question) for ungrounded_graph in structure.ungrounded_graph_forest: for grounded_graph in ungrounded_graph.get_grounded_graph_forest( ): system_denotation_set = set(grounded_graph.denotation) recall, precision, f1 = sempre_evaluation.computeF1( gold_answer_mid_set, system_denotation_set) grounded_graph.f1_score = f1 grounded_graph.recall_score = recall grounded_graph.precision_score = precision write_structure_file(structure_list, input_file + structure_path) except Exception as e: print('error') error_list.append(structure_path) print('error_list:\t', error_list)
def train_data_generation_samestructure_graphq(propertys, files, qid_abstractquestions): data_for_train_list = list() for i, file in enumerate(files): print(i, file) data = read_structure_file(file) qid = file.split('/')[-1].split('.')[0] if len(qid_abstractquestions[qid]) == 0: continue negatives = list() j = 0 # join=True for structure in data: gold_path = [] predicates = [] # for edge in structure.gold_graph_query.edges: # gold_path.append(edge.relation) # predicates.append(edge.relation) for edge in structure.gold_sparql_query['edges']: gold_path.append(edge['relation']) predicates.append(edge['relation']) gold_path.sort() gold_path = '\t'.join(gold_path) for ungrounded_graph in structure.ungrounded_graph_forest: for grounded_graph in ungrounded_graph.grounded_graph_forest: path = grounded_graph.key_path ps = path.split('\t') ps.sort() path = '\t'.join(ps) if j < model_parameters.neg_size and len(ps) == len( predicates) and path != gold_path: negatives.append(path) j += 1 if j > 0: if j < model_parameters.neg_size: while j < model_parameters.neg_size: candidate = list() for i in range(len(predicates)): candidate.append(propertys[random.randint( 0, len(propertys) - 1)]) candidate.sort() candidate = "\t".join(candidate) if candidate != gold_path and candidate not in negatives: negatives.append(candidate) j += 1 one = dict() one["qid"] = qid one["abstractquestion"] = (qid_abstractquestions[qid]) one["gold_path"] = gold_path one["negatives"] = negatives data_for_train_list.append(one) else: print('not join', qid) write_json( data_for_train_list, fn_graph_file.path_match_dir + "data_for_trainorval_list_samestructure.json")
def computed_every_grounded_graph_f1_graphq(input_file): from datasets_interface.question_interface import graphquestion_interface for structure_path in os.listdir(input_file): structure_with_grounded_graphq_file = input_file + structure_path print(structure_path) structure_list = read_structure_file( structure_with_grounded_graphq_file) for structure in structure_list: gold_answers_mid_set = graphquestion_interface.get_answers_mid_by_question( structure.question) for ungrounded_graph in structure.ungrounded_graph_forest: for grounded_graph in ungrounded_graph.get_grounded_graph_forest( ): new_system_answers_list = [] for system_answer in set(grounded_graph.denotation): if isinstance(system_answer, int): new_system_answers_list.append(str(system_answer)) else: new_system_answers_list.append(system_answer) recall, precision, f1 = sempre_evaluation.computeF1( gold_answers_mid_set, new_system_answers_list) grounded_graph.f1_score = f1 grounded_graph.recall_score = recall grounded_graph.precision_score = precision if f1 > 0: print( structure_path, f1 ) # print(structure_path, gold_answers_mid_set, new_system_answers_list, f1) structure.gold_answer = gold_answers_mid_set # update answers by answer mid list ["Kimberly-Clark"] ['en.kimberly-clark'] write_structure_file(structure_list, input_file + structure_path)
def run_grounding_graph_path_match(input_file_folder): '''path candidate grounding graph''' from grounding.ranking.path_match_nn.path_match_interface import PathMatchByLexicalNN import os from parsing.parsing_utils import extract_importantwords_from_question all_data_path = os.listdir(input_file_folder) pml = PathMatchByLexicalNN() for path in all_data_path: print(path) structure_with_grounded_graphq_file = input_file_folder + path structure_list = read_structure_file( structure_with_grounded_graphq_file) for structure in structure_list: question = structure.question for ungrounded_graph in structure.ungrounded_graph_forest: importantwords_list = extract_importantwords_from_question( question=question, ungrounded_graph=ungrounded_graph) print(importantwords_list, len(ungrounded_graph.get_grounded_graph_forest())) for grounded_graph in ungrounded_graph.get_grounded_graph_forest( ): grounded_graph.score = pml.get_path_pro( grounded_graph.key_path, importantwords_list ) # '\t'.join(grounded_graph.key_path), print(grounded_graph.key_path, importantwords_list, grounded_graph.score) write_structure_file(structure_list, structure_with_grounded_graphq_file)
def run_grounded_graph_generation_by_structure_transformation( structure_with_grounded_graphq_node_grounding_file, output_file): from grounding._2_2_grounded_graph_offline import graph_2_1_to_2_2_by_transfer from grounding.grounded_graph_to_sparql import grounded_graph_to_sparql_CWQ def count_denotation_to_num(grounded_graph): ''' # counting # how many softwares are developed by google? ''' num = 0 denotation_set = grounded_graph.denotation if denotation_set is not None: num = len(denotation_set) return [num] structure_list = read_structure_file( structure_with_grounded_graphq_node_grounding_file) new_structure_list = [] error_qid_list = [] for i, structure in enumerate(structure_list): if str(structure.qid) + '.json' in os.listdir(output_file): continue new_structure_list.clear() print(i, structure.qid, structure.question) is_print = False for ungrounded_graph in structure.ungrounded_graph_forest: grounded_graph_forest = [] for _2_1_grounded_graph in ungrounded_graph.get_grounded_graph_forest( ): try: grounded_graph_forest.extend( graph_2_1_to_2_2_by_transfer. generate_candidates_by_2_1_grounded_graph_interface( _2_1_grounded_graph=_2_1_grounded_graph)) except Exception as e: print('#Error:', structure.qid, e) error_qid_list.append(structure.qid) # break if len(grounded_graph_forest) > 0: is_print = True print('#Size:', len(grounded_graph_forest)) for z in range(len(grounded_graph_forest)): grounded_graph_forest[ z].grounded_query_id = ungrounded_graph.ungrounded_query_id * 100000 + z grounded_graph_forest[ z].sparql_query = grounded_graph_to_sparql_CWQ( grounded_graph_forest[z]) if structure.function == 'count': grounded_graph_forest[ z].denotation = count_denotation_to_num( grounded_graph_forest[z]) ungrounded_graph.set_grounded_graph_forest(grounded_graph_forest) if is_print: new_structure_list.append(structure) write_structure_file(new_structure_list, output_file + str(structure.qid) + '.json') print('Error qid list:', error_qid_list)
def generate_paths_graphq_interface_from_graph_2_1_graphq( structure_with_2_1_grounded_graph_file): def is_exist(question_type=None, entities_or_literals=None): from grounding import grounding_args blag = 0 filename_1 = question_type filename_2 = None if len(entities_or_literals) == 1: filename_1 += '_' + entities_or_literals[0][ 1] + '_' + entities_or_literals[0][0] elif len(entities_or_literals) == 2: filename_1 += '_' + entities_or_literals[0][ 1] + '_' + entities_or_literals[0][0] filename_1 += '_' + entities_or_literals[1][ 1] + '_' + entities_or_literals[1][0] filename_2 = question_type filename_2 += '_' + entities_or_literals[1][ 1] + '_' + entities_or_literals[1][0] filename_2 += '_' + entities_or_literals[0][ 1] + '_' + entities_or_literals[0][0] if filename_1 in grounding_args.oracle_all_files_path_names: blag = 1 elif filename_2 is not None and filename_2 in grounding_args.oracle_all_files_path_names: blag = 1 return blag structure_list = read_structure_file( structure_with_2_1_grounded_graph_file) error_qid_list = [] # structure_list = structure_list[0:1000] for i, structure in enumerate(structure_list): for ungrounded_graph in structure.ungrounded_graph_forest: for _2_1_grounded_graph in ungrounded_graph.get_grounded_graph_forest( ): # try: entities_list = grounding_utils.convert_2_1_graph_to_qid_entities( _2_1_graph=_2_1_grounded_graph) if len(entities_list) == 1: blag = is_exist(question_type='composition', entities_or_literals=entities_list) if blag == 0: print( ('%s\t%s\t%s\t%d') % (structure.qid, 'composition', str(entities_list), blag)) elif len(entities_list) == 2: blag = is_exist(question_type='conjunction', entities_or_literals=entities_list) if blag == 0: print( ('%s\t%s\t%s\t%d') % (structure.qid, 'conjunction', str(entities_list), blag)) # except Exception as e: # error_qid_list.append(structure.qid) print('#error:\t', error_qid_list)
def computed_every_grounded_graph_f1_webq_name(input_file, answer_file, mid_to_names_file): # from datasets_interface.freebase import webquestions_interface # from evaluation.webq_denotation import webq_mid_to_names_process #------------------------------------------------ #read qid-to-answers qid_to_answers_dict = dict() lines = read_list(answer_file) for line in lines: cols = line.split('\t') qid_to_answers_dict[cols[0]] = eval(cols[2]) #------------------------------------------------ # mid to names dict mid_to_names_dict = dict() lines = read_list(mid_to_names_file) for line in lines: cols = line.split('\t') mid = cols[1] names = list(eval(cols[2])) mid_to_names_dict[mid] = names #------------------------------------------------ all_structure_path = os.listdir(input_file) for structure_path in all_structure_path: structure_with_grounded_graphq_file = input_file + structure_path structure_list = read_structure_file( structure_with_grounded_graphq_file) for structure in structure_list: qid = structure.qid gold_answer_names_set = evaluation_utils.search_for_answers_by_id( qid, qid_to_answers_dict) print(structure_path, '#gold:\t', gold_answer_names_set) for ungrounded_graph in structure.ungrounded_graph_forest: for grounded_graph in ungrounded_graph.get_grounded_graph_forest( ): system_denotation_names_set = set() for denotation_mid in grounded_graph.denotation: denotation_name = evaluation_utils.get_name_by_mid( denotation_mid, mid_to_names_dict) print('###denotation:\t', denotation_mid, denotation_name) if denotation_name is not None: system_denotation_names_set.add(denotation_name) else: print(denotation_mid, '#####error!!!', denotation_name) print('#gold:\t', gold_answer_names_set, '#system:\t', system_denotation_names_set) recall, precision, f1 = sempre_evaluation.computeF1( gold_answer_names_set, system_denotation_names_set) if f1 > 0.0: print('#result:\t', f1) grounded_graph.f1_score = f1 write_structure_file(structure_list, input_file + structure_path)
def generate_qid_abstractquestion(): # dev_2_1 = read_structure_file(dev_structure_with_2_1_grounded_graph_file) test_2_1 = read_structure_file(test_structure_with_2_1_grounded_graph_file) train_2_1 = read_structure_file( train_structure_with_2_1_grounded_graph_file) qid_abstractquestion = dict() any_2_1_dict = {'train': train_2_1, 'test': test_2_1} #'dev': dev_2_1 for key in any_2_1_dict: any_2_1 = any_2_1_dict[key] for one in any_2_1: qid = key + "_" + str(one.qid) question = one.question for ungrounded_graph in one.ungrounded_graph_forest: question_ = question for node in ungrounded_graph.nodes: if node.node_type == 'entity': question_ = question_.replace(node.friendly_name, '<e>') qid_abstractquestion[qid] = question_ break # print(len(qid_abstractquestions)) write_json(qid_abstractquestion, data_question_match + 'qid_abstractquestion.json') return qid_abstractquestion
def computed_every_grounded_graph_f1_graphq(input_file): from grounding.grounding_args import test_qid_to_answers_mid_dict, train_qid_to_answers_mid_dict for structure_path in os.listdir(input_file): #all_structure_path structure_with_grounded_graphq_file = input_file + structure_path print(structure_path) structure_list = read_structure_file( structure_with_grounded_graphq_file) for structure in structure_list: gold_answers_mid_set = [] qid = structure.qid if qid in test_qid_to_answers_mid_dict: gold_answers_mid_set = test_qid_to_answers_mid_dict[qid] elif qid in train_qid_to_answers_mid_dict: gold_answers_mid_set = train_qid_to_answers_mid_dict[qid] #[80] -> ['80'] new_gold_answers_set = set() for gold_answer in gold_answers_mid_set: if isinstance(gold_answer, int): new_gold_answers_set.add(str(gold_answer)) else: new_gold_answers_set.add(gold_answer) gold_answers_mid_set = list(new_gold_answers_set) for ungrounded_graph in structure.ungrounded_graph_forest: for grounded_graph in ungrounded_graph.get_grounded_graph_forest( ): system_denotation_set = set(grounded_graph.denotation) new_system_answers_set = set() for system_answer in system_denotation_set: if isinstance(system_answer, int): new_system_answers_set.add(str(system_answer)) else: new_system_answers_set.add(system_answer) new_system_answers_set = list(new_system_answers_set) recall, precision, f1 = sempre_evaluation.computeF1( gold_answers_mid_set, new_system_answers_set) print(structure_path, gold_answers_mid_set, new_system_answers_set, f1) grounded_graph.f1_score = f1 if f1 > 0: print(f1) # update answers by answer mid list ["Kimberly-Clark"] ['en.kimberly-clark'] structure.gold_answer = gold_answers_mid_set write_structure_file(structure_list, input_file + structure_path)
def grounded_graphes_by_score_standard_ywsun_prediction_test(input_file): from common.hand_files import write_json all_structure_path = os.listdir(input_file) # all_f1_score = 0 prediction_list = [] for structure_path in all_structure_path: print(structure_path) structure_list = read_structure_file(input_file + structure_path) score_to_queryid_sparql = collections.defaultdict(list) # grounded_query_id_to_f1_denotation = collections.defaultdict(set) grounded_query_id_to_denotation = collections.defaultdict(set) qid = None for structure in structure_list: qid = structure.qid for ungrounded_graph in structure.ungrounded_graph_forest: # ungrounded_graph_edges_num = len(ungrounded_graph.edges) for grounded_graph in ungrounded_graph.get_grounded_graph_forest( ): # grounded_graph_edges_num = len(grounded_graph.edges) # edge constaints # if grounded_graph_edges_num != ungrounded_graph_edges_num: continue # score_to_queryid_sparql[grounded_graph.score].append(grounded_graph.grounded_query_id) #word level matcher score_to_queryid_sparql[grounded_graph.total_score].append( grounded_graph.grounded_query_id) # grounded_query_id_to_f1_denotation[grounded_graph.grounded_query_id] = grounded_graph.f1_score grounded_query_id_to_denotation[ grounded_graph. grounded_query_id] = grounded_graph.denotation answers = [] score_to_queryid_sparql = dict( sorted(score_to_queryid_sparql.items(), key=lambda d: d[0], reverse=True)) for totalscore, grounded_query_ids in score_to_queryid_sparql.items(): for grounded_query_id in grounded_query_ids: answers = grounded_query_id_to_denotation[grounded_query_id] # all_f1_score += f1_score # top1id = grounded_query_id break break q_dict = dict() q_dict['ID'] = qid q_dict['answers_id'] = answers prediction_list.append(q_dict) write_json(prediction_list, './20191113_cwq_wo_wordlevel_prediction_test.json')
def generate_cwq_train_candidates_paths_from_structure( cwq_gold_path_list, train_candidates_sp_path_top_path, output_file): files = os.listdir(train_candidates_sp_path_top_path) new_cwq_path_list = [] for one in cwq_gold_path_list: print(one['qid']) if str(one['qid']) + '.json' not in files: continue if 'path' not in one['gold']: continue new_one = dict() new_one['qid'] = one['qid'] new_one['question_normal'] = one['question_normal'] new_one['gold'] = one['gold'] test_candidates_sp = read_structure_file( train_candidates_sp_path_top_path + str(one['qid']) + '.json') test_candidates_sp = test_candidates_sp[0] ungrounded_graph = test_candidates_sp.ungrounded_graph_forest[-1] hop1, hop2, hop3, hop4 = score12_utils.grounded_graph_list_to_path_list( ungrounded_graph.get_grounded_graph_forest()) hops = [] if len(hop1) > 0: new_one['gold']['hop1'] = hop1 hops += hop1 if len(hop2) > 0: new_one['gold']['hop2'] = hop2 hops += hop2 if len(hop3) > 0: new_one['gold']['hop3'] = hop3 hops += hop3 if len(hop4) > 0: new_one['gold']['hop4'] = hop4 hops += hop4 goldpath = None for hop in hops: for i, temp_goldpath in enumerate( new_one['gold']['reverse_paths_list']): if score12_utils.eq_paths(temp_goldpath, hop): goldpath = temp_goldpath break if goldpath is not None: new_one['gold']['path'] = goldpath del new_one['gold']['reverse_paths_list'] new_cwq_path_list.append(new_one) write_json(new_cwq_path_list, fn_cwq_file.score12_match + output_file)
def show_f1_given_qids(input_file, qids): qid_f1 = dict() all_data_path = os.listdir(input_file) for path in all_data_path: if path.split('.')[0] in qids: structure_with_grounded_graphq_file = input_file + path structure_list = read_structure_file( structure_with_grounded_graphq_file) print(path) max_f1 = 0 for structure in structure_list: for ungrounded_graph in structure.ungrounded_graph_forest: for grounded_graph in ungrounded_graph.get_grounded_graph_forest( ): if max_f1 < grounded_graph.f1_score: max_f1 = grounded_graph.f1_score qid_f1[path.split('.')[0]] = max_f1 write_json(qid_f1, 'qid_f1.json')
def run_grounding_graph_add_question_match(input_file_folder): '''path candidate grounding graph''' all_data_path = os.listdir(input_file_folder) from grounding.ranking.path_match_sentence_level.question_match_interface import QuestionMatchInterface qmi = QuestionMatchInterface() for path in all_data_path: print(path) structure_with_grounded_graphq_file = input_file_folder + path structure_list = read_structure_file( structure_with_grounded_graphq_file) for structure in structure_list: qid = structure.qid for ungrounded_graph in structure.ungrounded_graph_forest: for grounded_graph in ungrounded_graph.get_grounded_graph_forest( ): grounded_graph.total_score = grounded_graph.score + qmi.get_score( qid, grounded_graph.denotation) write_structure_file(structure_list, structure_with_grounded_graphq_file)
def run_grounding_graph_guiyi_add_question_match(input_file_folder): '''path candidate grounding graph''' import os from common import utils from grounding.ranking.path_match_nn.question_match_interface import QuestionMatchInterface qmi = QuestionMatchInterface() for path in os.listdir(input_file_folder): print(path) structure_with_grounded_graphq_file = input_file_folder + path structure_list = read_structure_file( structure_with_grounded_graphq_file) all_score = [] for structure in structure_list: for ungrounded_graph in structure.ungrounded_graph_forest: for grounded_graph in ungrounded_graph.get_grounded_graph_forest( ): all_score.append(grounded_graph.score) all_score_guiyi = utils.Normalize(all_score) score_guiyi = dict() for i, score_ori in enumerate(all_score): score_guiyi[score_ori] = all_score_guiyi[i] for structure in structure_list: qid = structure.qid for ungrounded_graph in structure.ungrounded_graph_forest: for grounded_graph in ungrounded_graph.get_grounded_graph_forest( ): # 3.单独跑 question match # grounded_graph.total_score = qmi.get_score(qid, grounded_graph.denotation) # if grounded_graph.total_score > 0: # print ('\t\t', grounded_graph.total_score) # 4.单独跑 question match grounded_graph.score = qmi.get_score( qid, grounded_graph.denotation) if grounded_graph.score > 0: print('\t\t', grounded_graph.score) # 4.跑word match+question match # grounded_graph.total_score = score_guiyi[grounded_graph.score] + qmi.get_score(qid, grounded_graph.denotation) # return write_structure_file(structure_list, structure_with_grounded_graphq_file)
def run_candidate_graph_generation(structure_with_1_ungrounded_lcquad_file, output_file, q_mode='lcquad'): from method_ir.grounding import graph_2_1_to_2_2_ir from method_sp.grounding import grounded_graph_to_sparql from method_sp.grounding import sparql_to_denotation import os structure_list = read_structure_file(structure_with_1_ungrounded_lcquad_file) error_qid_list = [] for _, structure in enumerate(structure_list): if str(structure.qid) + '.json' in os.listdir(output_file): continue print(structure.qid) compositionality_type = structure.compositionality_type for j, ungrounded_graph in enumerate(structure.ungrounded_graph_forest): if j != len(structure.ungrounded_graph_forest) - 1: continue grounded_graph_forest = [] for _2_1_grounded_graph in ungrounded_graph.get_grounded_graph_forest(): try: if q_mode == 'graphq': grounded_graph_forest.extend(graph_2_1_to_2_2_ir.get_oracle_graphs_by_2_1_graph_graphq(_2_1_grounded_graph=_2_1_grounded_graph, qtype=compositionality_type)) elif q_mode == 'cwq': grounded_graph_forest.extend(graph_2_1_to_2_2_ir.get_oracle_graphs_by_2_1_graph_cwq(_2_1_grounded_graph=_2_1_grounded_graph, qtype=compositionality_type)) elif q_mode == 'lcquad': grounded_graph_forest.extend(graph_2_1_to_2_2_ir.get_oracle_graphs_by_2_1_graph_lcquad(_2_1_grounded_graph=_2_1_grounded_graph, qtype=compositionality_type)) except Exception as e: print('#Error:', structure.qid, e) grounded_graph_forest.clear() error_qid_list.append(structure.qid) break for z in range(len(grounded_graph_forest)): grounded_graph_forest[z].grounded_query_id = ungrounded_graph.ungrounded_query_id * 100000 + z grounded_graph_forest[z].sparql_query = grounded_graph_to_sparql.grounded_graph_to_sparql(grounded_graph=grounded_graph_forest[z], q_function=structure.function, q_compositionality_type=structure.compositionality_type, q_mode=q_mode) grounded_graph_forest[z].denotation = sparql_to_denotation.set_denotation(grounded_graph=grounded_graph_forest[z], q_compositionality_type=structure.compositionality_type) ungrounded_graph.set_grounded_graph_forest(grounded_graph_forest) print('#size:\t', len(grounded_graph_forest)) if len(grounded_graph_forest) > 0: write_structure_file([structure], output_file + str(structure.qid) + '.json') print('Error qid list:', error_qid_list)
def computed_every_grounded_graph_f1_lcquad(input_file): from datasets_interface.question_interface import lcquad_1_0_interface for structure_path in os.listdir(input_file): structure_with_grounded_graphq_file = input_file + structure_path print(structure_path) structure_list = read_structure_file( structure_with_grounded_graphq_file) for structure in structure_list: gold_answer_mid_set = lcquad_1_0_interface.get_answers_by_question( structure.question) #['http://dbpedia.org/resource/Colorado'] for ungrounded_graph in structure.ungrounded_graph_forest: for grounded_graph in ungrounded_graph.get_grounded_graph_forest( ): system_denotation_set = set(grounded_graph.denotation) recall, precision, f1 = sempre_evaluation.computeF1( gold_answer_mid_set, system_denotation_set) grounded_graph.f1_score = f1 grounded_graph.recall_score = recall grounded_graph.precision_score = precision write_structure_file(structure_list, input_file + structure_path)
def computed_every_grounded_graph_f1_webq_mid(input_file, answer_file): #read qid-to-answers all_structure_path = os.listdir(input_file) for structure_path in all_structure_path: structure_with_grounded_graphq_file = input_file + structure_path structure_list = read_structure_file( structure_with_grounded_graphq_file) for structure in structure_list: qid = structure.qid gold_answer_mid_set = evaluation_utils.search_for_answers_by_id( qid, qid_to_answers_dict) print(structure_path, gold_answer_mid_set) for ungrounded_graph in structure.ungrounded_graph_forest: for grounded_graph in ungrounded_graph.get_grounded_graph_forest( ): system_denotation_set = set(grounded_graph.denotation) recall, precision, f1 = sempre_evaluation.computeF1( gold_answer_mid_set, system_denotation_set) grounded_graph.f1_score = f1 write_structure_file(structure_list, input_file + structure_path)
def computed_every_grounded_graph_f1_cwq(input_file): all_structure_path = os.listdir(input_file) for structure_path in all_structure_path: structure_with_grounded_graphq_file = input_file + structure_path print(structure_path) structure_list = read_structure_file( structure_with_grounded_graphq_file) for structure in structure_list: gold_answer_mid_set = evaluation_utils.get_gold_answers( structure.gold_answer) for ungrounded_graph in structure.ungrounded_graph_forest: for grounded_graph in ungrounded_graph.get_grounded_graph_forest( ): system_denotation_set = set(grounded_graph.denotation) recall, precision, f1 = sempre_evaluation.computeF1( gold_answer_mid_set, system_denotation_set) grounded_graph.f1_score = f1 if f1 > 0: print(f1) write_structure_file(structure_list, input_file + structure_path)
def investigate_denotation_same(): testqid_trainqid_bertmax = read_json(data_question_match + 'testqid_trainqid_bertmax.json') qmi = QuestionMatchInterface() structure_2_2_files = '/2.2_test_span_transfer_wo_wordlevel/' all_data_path = os.listdir(output_path + structure_2_2_files) for path in all_data_path: print(path) test_qid = path.split('.')[0] test_qid = 'test_' + str(test_qid) # if 'test_'+str(test_qid) not in testqid_trainqid_bertmax: if test_qid not in testqid_trainqid_bertmax: continue # structure_with_grounded_graphq_file = output_path + structure_2_2_files + path structure_list = read_structure_file(output_path + structure_2_2_files + path) for structure in structure_list: for ungrounded_graph in structure.ungrounded_graph_forest: nodes = [] for groundedgraph in ungrounded_graph.get_grounded_graph_forest( ): nodes = groundedgraph.nodes break # print(test_qid) # denotation = set(qmi.get_denotation_by_testqid_nodes(test_qid, nodes)) denotation = set( qmi.get_denotation_by_testqid_nodes_freebase( test_qid, nodes)) print('denotations:', denotation) # gold_mids = set() # for one in structure.gold_answer: # gold_mids.add(one['answer_id']) # # if (len(denotation-gold_mids)==0 and len(gold_mids-denotation)==0): # print('oh no',test_qid) # if test_qid in qmunique_qids: # print('double oh no') write_json( qmi.testqid_correspondingtrainqid_denotations, data_question_match + 'testqid_correspondingtrainqid_denotations.json')
def generate_testset(): testset = [] test_2_1 = read_structure_file(test_structure_with_2_1_grounded_graph_file) train_predicate_qids = read_json(data_question_match + 'train_predicate_qids.json') qid_abstractquestions = read_json(data_question_match + 'qid_abstractquestion.json') train_abstractquestion = set() for predicate in train_predicate_qids: for qid in train_predicate_qids[predicate]: if qid in qid_abstractquestions: train_abstractquestion.add(qid_abstractquestions[qid]) test_abstractquestions = set() for one in test_2_1: if 'test_' + str(one.qid) in qid_abstractquestions: abstractquestion = qid_abstractquestions['test_' + str(one.qid)] test_abstractquestions.add(abstractquestion) for abstractquestion in test_abstractquestions: for ta in train_abstractquestion: testset.append([abstractquestion, ta]) write_json(testset, data_question_match + 'testset.json')
def computed_every_grounded_graph_f1_lcquad(input_file): from datasets_interface.question_interface import lcquad_1_0_interface all_structure_path = os.listdir(input_file) for structure_path in all_structure_path: structure_with_grounded_graphq_file = input_file + structure_path print(structure_path) structure_list = read_structure_file( structure_with_grounded_graphq_file) for structure in structure_list: gold_answer_mid_set = lcquad_1_0_interface.get_answers_by_question( structure.question) print('#gold answer:\t', gold_answer_mid_set) for ungrounded_graph in structure.ungrounded_graph_forest: for grounded_graph in ungrounded_graph.get_grounded_graph_forest( ): system_denotation_set = set(grounded_graph.denotation) recall, precision, f1 = sempre_evaluation.computeF1( gold_answer_mid_set, system_denotation_set) grounded_graph.f1_score = f1 if f1 > 0: print(f1) write_structure_file(structure_list, input_file + structure_path)
def compute_all_questions_recall(input_file): ''' # structure_with_2_2_grounded_graph_folder = output_file_folder + '/2.2_0_500/' # compute_recall(input_file=structure_with_2_2_grounded_graph_folder) :param input_file: :return: ''' all_data_path = os.listdir(input_file) all_recall = 0 for path in all_data_path: structure_with_grounded_graphq_file = input_file + path structure_list = read_structure_file( structure_with_grounded_graphq_file) max_f1 = 0 for structure in structure_list: for ungrounded_graph in structure.ungrounded_graph_forest: for grounded_graph in ungrounded_graph.get_grounded_graph_forest( ): if max_f1 < grounded_graph.f1_score: max_f1 = grounded_graph.f1_score all_recall += max_f1 print(('%s\t%s') % (path, str(max_f1))) print(all_recall, len(all_data_path))
def generate_paths_graphq_interface_from_lcquad_el( structure_with_2_1_grounded_graph_file): from datasets_interface.question_interface import lcquad_1_0_interface structure_list = read_structure_file( structure_with_2_1_grounded_graph_file) error_qid_list = [] for i, structure in enumerate(structure_list): try: # entities_list = lcquad_1_0_interface.get_topic_entities_list_by_question(structure.question) entities_list = lcquad_1_0_interface.get_topic_entities_list_by_question_from_nn( structure.question) new_entities_list = [] for entity in entities_list: new_entities_list.append([entity, 'entity']) if len(entities_list) == 1: print(('%s\t%s\t%s') % (structure.qid, 'composition', str(new_entities_list))) elif len(entities_list) == 2: print(('%s\t%s\t%s') % (structure.qid, 'conjunction', str(new_entities_list))) except Exception as e: error_qid_list.append(structure.qid) print('#error:\t', error_qid_list)
def run_grounding_graph_score12_match(input_file_folder, q_mode='lcquad'): """path candidate grounding graph""" from method_ir.grounding.path_match_score12.path_match_interface import PathMatchScore12 path_match_score12 = PathMatchScore12(q_mode) for path in os.listdir(input_file_folder): structure_with_grounded_graphq_file = input_file_folder + path print(path) structure_list = read_structure_file(structure_with_grounded_graphq_file) for structure in structure_list: question = structure.question for j, ungrounded_graph in enumerate(structure.ungrounded_graph_forest): if j != len(structure.ungrounded_graph_forest) - 1: continue grounded_graph_list = ungrounded_graph.get_grounded_graph_forest() try: bert_scores = path_match_score12.set_bert_score_score12(question_normal=question, grounded_graph_forest_list=grounded_graph_list) for grounded_graph, bert_score in zip(grounded_graph_list, bert_scores): grounded_graph.score = bert_score except Exception as e: for grounded_graph in grounded_graph_list: grounded_graph.score = 0.0 print('error') write_structure_file(structure_list, structure_with_grounded_graphq_file) print('over')
def run_grounded_node_grounding_dbpedia(structure_with_ungrounded_graphq_file, output_file, linking_is_gold=False): ''' function: 1.0 ungrounded query -> 2.1 grounded query input: structure_ungrounded_graphq_file :return: grounded graph with entity linking ''' from datasets_interface.question_interface import lcquad_1_0_interface from method_sp.grounding._2_1_grounded_graph.node_linking import node_linking_interface_dbpedia from method_sp.grounding._2_1_grounded_graph.grounded_graph_2_1_generation import generate_grounded_graph_interface structure_list = read_structure_file(structure_with_ungrounded_graphq_file) for structure in structure_list: print(structure.qid) for i, ungrounded_graph in enumerate( structure.get_ungrounded_graph_forest()): if i != len(structure.get_ungrounded_graph_forest()) - 1: continue grounding_result_list = [] for node in ungrounded_graph.nodes: if linking_is_gold: result_dict = lcquad_1_0_interface.get_topic_entities_by_question_and_mention( question=structure.question, mention=node.friendly_name) grounding_result_list.append((node, result_dict)) else: grounding_result_list.append( (node, node_linking_interface_dbpedia.node_linking( node=node))) grouned_graph_list = generate_grounded_graph_interface( ungrounded_graph=ungrounded_graph, grounding_result_list=grounding_result_list) ungrounded_graph.set_grounded_linking(grounding_result_list) ungrounded_graph.set_grounded_graph_forest(grouned_graph_list) write_structure_file(structure_list, output_file)