def set_model_data(self): assert mode in ['cwq', 'graphq'] if mode == 'cwq': model_file = fn_cwq_file.model_file + "_iter_{}_devf1_{}_model.pt".format( 2720, 52) self.relortype_level_word = read_json( kb_freebase_latest_file.dataset + "relortype_level_words.json") elif mode == 'graphq': model_file = fn_graph_file.model_file + "_iter_{}_devf1_{}_model.pt".format( 570, 48) self.relortype_level_word = read_json(kb_freebase_en_2013.dataset + "relortype_level_words.json") if self.model_parameters.gpu >= 0: self.model = torch.load(model_file, map_location=lambda storage, location: storage.cuda(self.model_parameters.gpu)) else: self.model = torch.load( model_file, map_location=lambda storage, location: storage) self.model.eval() self.wem = WordEmbedding() # self.pretrained_embedding = torch.load(self.model_parameters.vector_cache_file) # self.word_dict = torch.load(self.model_parameters.word_dict_file) # self.word_pair_sim = torch.load(fn_cwq_file.question_match_dir + 'word_pair_sim.pt') # self.pad_index = self.word_dict.lookup(self.word_dict.pad_token) self.word_pair_sim = dict()
def __init__(self): if mode == 'cwq': self.train_qid_to_grounded_graph_dict = extract_grounded_graph_from_jena_freebase( globals_args.fn_cwq_file.complexwebquestion_train_bgp_dir) self.testqid_trainqid_bertmax = read_json( fn_cwq_file.question_match_dir + 'testqid_trainqid_bertmax.json') self.testqid_correspondingtrainqid_denotations = read_json( fn_cwq_file.question_match_dir + 'testqid_correspondingtrainqid_denotations.json')
def generate_trainset(): trainset = [] train_predicate_qids = read_json(data_question_match + 'train_predicate_qids.json') qid_abstractquestions = read_json(data_question_match + 'qid_abstractquestion.json') abstractquestion_all = set() for predicate in train_predicate_qids: for qid in train_predicate_qids[predicate]: #"train_WebQTrn-3513_7c4117891abf63781b892537979054c6", if qid in qid_abstractquestions: abstractquestion_all.add(qid_abstractquestions[qid]) for k, predicate in enumerate(train_predicate_qids): print(k, predicate) same_abstractquestions = set() for qid in train_predicate_qids[predicate]: if qid in qid_abstractquestions: same_abstractquestions.add(qid_abstractquestions[qid]) residu_abstractquestions = (list(abstractquestion_all - same_abstractquestions)) same_abstractquestions = list(same_abstractquestions)[:10] for first, current in enumerate(same_abstractquestions): for second, gold in enumerate(same_abstractquestions): if current != gold: random.shuffle(residu_abstractquestions) neg_samples = residu_abstractquestions[:50] trainset.append([current, gold, 1]) for neg in neg_samples: trainset.append([current, neg, 0]) # if len(same_abstractquestions)>1: # current=list(same_abstractquestions)[0] # gold=list(same_abstractquestions)[1] # random.shuffle(residu_abstractquestions) # neg_samples = residu_abstractquestions[:20] # trainset.append([current,gold,1]) # for neg in neg_samples: # trainset.append([current, neg, 0]) # # trainset.append([current, gold, neg_samples]) # current = list(same_abstractquestions)[1] # gold = list(same_abstractquestions)[0] # random.shuffle(residu_abstractquestions) # neg_samples = residu_abstractquestions[:20] # trainset.append([current, gold, 1]) # for neg in neg_samples: # trainset.append([current, neg, 0]) # # trainset.append([current, gold, neg_samples]) write_json(trainset, data_question_match + 'trainset.json')
def reverse(path): data = read_json(path) res = dict() for key in data: for val in data[key]: res[val] = key return res
def generate_testset(): testset = [] test_2_1 = read_structure_file(test_structure_with_2_1_grounded_graph_file) train_predicate_qids = read_json(data_question_match + 'train_predicate_qids.json') qid_abstractquestions = read_json(data_question_match + 'qid_abstractquestion.json') train_abstractquestion = set() for predicate in train_predicate_qids: for qid in train_predicate_qids[predicate]: if qid in qid_abstractquestions: train_abstractquestion.add(qid_abstractquestions[qid]) test_abstractquestions = set() for one in test_2_1: if 'test_' + str(one.qid) in qid_abstractquestions: abstractquestion = qid_abstractquestions['test_' + str(one.qid)] test_abstractquestions.add(abstractquestion) for abstractquestion in test_abstractquestions: for ta in train_abstractquestion: testset.append([abstractquestion, ta]) write_json(testset, data_question_match + 'testset.json')
def __init__(self): if mode == 'cwq': self.train_qid_to_grounded_graph_dict = extract_grounded_graph_from_jena_freebase( globals_args.fn_cwq_file.complexwebquestion_train_bgp_dir) self.testqid_trainqid_bertmax = read_json( fn_cwq_file.question_match_dir + 'testqid_trainqid_bertmax.json') self.testqid_correspondingtrainqid_denotations = read_json( fn_cwq_file.question_match_dir + 'testqid_correspondingtrainqid_denotations.json') elif mode == 'lcquad': self.train_qid_to_grounded_graph_dict = extract_grounded_graph_from_jena_dbpedia( fn_lcquad_file.lcquad_train_bgp_dir) self.testqid_trainqid_bertmax = read_json( fn_lcquad_file.question_match_dir + 'testqid_trainqid_bertmax.json') self.testqid_correspondingtrainqid_denotations = read_json( fn_lcquad_file.question_match_dir + 'testqid_correspondingtrainqid_denotations.json') # self.testqid_correspondingtrainqid_denotations={} elif mode == 'webq': resources_webq = root + '/resources_webq/' data_question_match = resources_webq + 'data_question_match/' train_webq_bgp_filepath = root + '\dataset_questions\webquestions/2019.06.04_wsp_train_bgp.txt' self.train_qid_to_grounded_graph_dict = extract_grounded_graph_from_jena_freebase( train_webq_bgp_filepath) self.testqid_trainqid_bertmax = read_json( data_question_match + 'testqid_trainqid_bertmax.json') self.testqid_correspondingtrainqid_denotations = read_json( data_question_match + 'testqid_correspondingtrainqid_denotations.json')
def _convert_file_to_oracle_graphs(file_path, question_type, entities_or_literals, is_constraint_mediator=False): candidate_graphquerys = [] data_dict = read_json(grounding_args.oracle_file_root + file_path) if question_type == 'composition': if grounding_args.q_mode in ['cwq', 'graphq']: candidate_graphquerys = path_to_graph_bgp.parser_composition_q_freebase_sp( data_dict=data_dict, s1=entities_or_literals[0][0], t1=entities_or_literals[0][1], is_constraint_mediator=is_constraint_mediator) elif grounding_args.q_mode in ['lcquad']: candidate_graphquerys = path_to_graph_bgp.parser_composition_q_dbpedia_sp( data_dict=data_dict, s1=entities_or_literals[0][0], t1=entities_or_literals[0][1], is_constraint_mediator=is_constraint_mediator) elif question_type == 'conjunction': if grounding_args.q_mode in ['cwq', 'graphq']: candidate_graphquerys = path_to_graph_bgp.parser_conjunction_q_freebase( data_dict=data_dict, s1=entities_or_literals[0][0], t1=entities_or_literals[0][1], s2=entities_or_literals[1][0], t2=entities_or_literals[1][1]) elif grounding_args.q_mode in ['lcquad']: candidate_graphquerys = path_to_graph_bgp.parser_conjunction_q_dbpedia( data_dict=data_dict, s1=entities_or_literals[0][0], t1=entities_or_literals[0][1], s2=entities_or_literals[1][0], t2=entities_or_literals[1][1]) elif question_type == 'ask': if grounding_args.q_mode in ['lcquad']: candidate_graphquerys = path_to_graph_bgp.parser_yesno_q_dbpedia( data_dict=data_dict, s1=entities_or_literals[0][0], t1=entities_or_literals[0][1], s2=entities_or_literals[1][0], t2=entities_or_literals[1][1]) return grounding_utils.candidate_query_to_grounded_graph( candidate_graphquerys=candidate_graphquerys)
def _get_oracle_graphs_comparative(_2_1_grounded_graph): anchor_entities_list = [] anchor_literal_list = [] for node in _2_1_grounded_graph.nodes: if node.node_type == 'entity': anchor_entities_list.append(node.id) elif node.node_type == 'literal': anchor_literal_list.append(node.id) candidate_graphquerys = [] for entity in anchor_entities_list: print('#anchor:\t', entity) filename_1 = 'comparative_entity_' + entity if filename_1 in grounding_args.oracle_all_files_path_names: data_dict = read_json(grounding_args.oracle_file_root + filename_1) candidate_graphquerys.extend( path_to_graph_comparative.parser_comparative_q_freebase_ir( data_dict=data_dict, s1=entity, t1='entity')) return grounding_utils.candidate_query_to_grounded_graph( candidate_graphquerys=candidate_graphquerys)
def investigate_denotation_same(): testqid_trainqid_bertmax = read_json(data_question_match + 'testqid_trainqid_bertmax.json') qmi = QuestionMatchInterface() structure_2_2_files = '/2.2_test_span_transfer_wo_wordlevel/' all_data_path = os.listdir(output_path + structure_2_2_files) for path in all_data_path: print(path) test_qid = path.split('.')[0] test_qid = 'test_' + str(test_qid) # if 'test_'+str(test_qid) not in testqid_trainqid_bertmax: if test_qid not in testqid_trainqid_bertmax: continue # structure_with_grounded_graphq_file = output_path + structure_2_2_files + path structure_list = read_structure_file(output_path + structure_2_2_files + path) for structure in structure_list: for ungrounded_graph in structure.ungrounded_graph_forest: nodes = [] for groundedgraph in ungrounded_graph.get_grounded_graph_forest( ): nodes = groundedgraph.nodes break # print(test_qid) # denotation = set(qmi.get_denotation_by_testqid_nodes(test_qid, nodes)) denotation = set( qmi.get_denotation_by_testqid_nodes_freebase( test_qid, nodes)) print('denotations:', denotation) # gold_mids = set() # for one in structure.gold_answer: # gold_mids.add(one['answer_id']) # # if (len(denotation-gold_mids)==0 and len(gold_mids-denotation)==0): # print('oh no',test_qid) # if test_qid in qmunique_qids: # print('double oh no') write_json( qmi.testqid_correspondingtrainqid_denotations, data_question_match + 'testqid_correspondingtrainqid_denotations.json')
def _get_2_2_graphs_by_type_and_literals(question_type, entities_or_literals, is_constraint_mediator=False): candidate_graphquerys = [] if len(entities_or_literals) == 2: # one literal, one entity literal_value = None entity_value = None for entity_or_literal in entities_or_literals: if entity_or_literal[1] == 'literal': literal_value = entity_or_literal[0] else: entity_value = entity_or_literal[0] if not isinstance(literal_value, str): literal_value = str(literal_value) literal_value = grounding_utils.literal_postprocess( literal_value, q_mode=grounding_args.q_mode) if literal_value in grounding_args.literal_to_id_map: literal_value_id = str( grounding_args.literal_to_id_map[literal_value]) else: return [] filename_1 = question_type + '_entity_' + entity_value + '_literal_' + literal_value_id filename_2 = question_type + '_literal_' + literal_value_id + '_entity_' + entity_value if filename_1 in grounding_args.oracle_all_files_path_names: data_dict = read_json(grounding_args.oracle_file_root + filename_1) if question_type == 'conjunction': if grounding_args.q_mode in ['cwq', 'graphq']: candidate_graphquerys = path_to_graph_bgp.parser_conjunction_q_freebase( data_dict=data_dict, s1=entity_value, t1='entity', s2=literal_value_id, t2='literal') elif grounding_args.q_mode in ['lcquad']: candidate_graphquerys = path_to_graph_bgp.parser_conjunction_q_dbpedia( data_dict=data_dict, s1=entity_value, t1='entity', s2=literal_value_id, t2='literal') elif filename_2 in grounding_args.oracle_all_files_path_names: data_dict = read_json(grounding_args.oracle_file_root + filename_2) if grounding_args.q_mode in ['cwq', 'graphq']: candidate_graphquerys = path_to_graph_bgp.parser_conjunction_q_freebase( data_dict=data_dict, s1=literal_value_id, t1='literal', s2=entity_value, t2='entity') elif grounding_args.q_mode in ['lcquad']: candidate_graphquerys = path_to_graph_bgp.parser_conjunction_q_dbpedia( data_dict=data_dict, s1=literal_value_id, t1='literal', s2=entity_value, t2='entity') elif len(entities_or_literals) == 1: literal_value = None for entity_or_literal in entities_or_literals: if entity_or_literal[1] == 'literal': literal_value = entity_or_literal[0] if not isinstance(literal_value, str): literal_value = str(literal_value) literal_value = grounding_utils.literal_postprocess( literal_value, q_mode=grounding_args.q_mode) if literal_value in grounding_args.literal_to_id_map: literal_value_id = str( grounding_args.literal_to_id_map[literal_value]) else: return [] filename_1 = question_type + '_literal_' + literal_value_id if filename_1 in grounding_args.oracle_all_files_path_names: data_dict = read_json(grounding_args.oracle_file_root + filename_1) if question_type == 'composition': if grounding_args.q_mode in ['cwq', 'graphq']: candidate_graphquerys = path_to_graph_bgp.parser_composition_q_freebase_sp( data_dict=data_dict, s1=literal_value, t1='literal', is_constraint_mediator=is_constraint_mediator) elif grounding_args.q_mode in ['lcquad']: candidate_graphquerys = path_to_graph_bgp.parser_composition_q_dbpedia_sp( data_dict=data_dict, s1=literal_value, t1='literal', is_constraint_mediator=is_constraint_mediator) return grounding_utils.candidate_query_to_grounded_graph( candidate_graphquerys=candidate_graphquerys)
GroundedEdge(start=edge["start"], end=edge["end"], relation=edge["relation"], friendly_name=edge["friendly_name"], score=1.0)) graphq.sparql_query = questionAnnotation["sparql_query"] graphq.parsed_sparql = questionAnnotation['parsed_sparql'] graphquestionsList.append(graphq) return graphquestionsList test_graph_questions_struct = read_graph_question_json( globals_args.fn_graph_file.graphquestions_testing_dir) train_graph_questions_struct = read_graph_question_json( globals_args.fn_graph_file.graphquestions_training_dir) annotation_node_questions_json = read_json( globals_args.fn_graph_file.graphquestions_node_ann_dir) def get_answers_by_question(question=None): answers = [] for data_ann in test_graph_questions_struct: if data_ann.question == question: answers = data_ann.answer break for data_ann in train_graph_questions_struct: if data_ann.question == question: answers = data_ann.answer return answers def get_answers_mid_by_question(question=None):
def generate_predicate_qids(): train_qid_to_grounded_graph_dict = questions_utils.extract_grounded_graph_from_jena_freebase( train_cwq_bgp_filepath) # dev_qid_to_grounded_graph_dict = questions_utils.extract_grounded_graph_from_jena_freebase(dev_cwq_bgp_filepath) test_qid_to_grounded_graph_dict = questions_utils.extract_grounded_graph_from_jena_freebase( test_cwq_bgp_filepath) qid_abstractquestions = read_json(data_question_match + 'qid_abstractquestion.json') train_predicate_qids = collections.defaultdict(list) for qid, grounded_graph in train_qid_to_grounded_graph_dict.items(): # qid='train_'+str(qid.split('-')[1]) qid = 'train_' + qid if qid not in qid_abstractquestions: continue predicates = [] for edge in grounded_graph.edges: predicates.append(edge.friendly_name) predicates.sort() predicate = '\t'.join(predicates) # print(qid) if len(qid_abstractquestions[qid]) > 0: # print('hi',qid) # abstractquestion = qid_abstractquestions[qid] train_predicate_qids[predicate].append(qid) write_json(train_predicate_qids, data_question_match + 'train_predicate_qids.json') test_predicate_qids = collections.defaultdict(list) for qid, grounded_graph in test_qid_to_grounded_graph_dict.items(): # qid = 'test_' + str(qid.split('-')[1]) qid = 'test_' + qid if qid not in qid_abstractquestions: continue predicates = [] for edge in grounded_graph.edges: predicates.append(edge.friendly_name) predicates.sort() predicate = '\t'.join(predicates) if len(qid_abstractquestions[qid]) > 0: # abstractquestion = qid_abstractquestions[qid] test_predicate_qids[predicate].append(qid) write_json(test_predicate_qids, data_question_match + 'test_predicate_qids.json') # dev_predicate_qids = collections.defaultdict(list) # for qid, grounded_graph in dev_qid_to_grounded_graph_dict.items(): # # qid = 'dev_' + str(qid.split('-')[1]) # qid='dev_' + qid # if qid not in qid_abstractquestions: # continue # predicates = [] # for edge in grounded_graph.edges: # predicates.append(edge.friendly_name) # predicates.sort() # predicate = '\t'.join(predicates) # if len(qid_abstractquestions[qid]) > 0: # # abstractquestion = qid_abstractquestions[qid] # dev_predicate_qids[predicate].append(qid) # write_json(dev_predicate_qids, data_question_match + 'dev_predicate_qids.json') num_intersect = 0 # 2718 for predicate in test_predicate_qids: if predicate in train_predicate_qids: num_intersect += len(test_predicate_qids[predicate]) print(num_intersect)
def score_testquestion_bert(): def reverse(path): data = read_json(path) res = dict() for key in data: for val in data[key]: res[val] = key return res # def read_abstractquestionpair_pro(): # diction = dict() # with open(data_question_match + '09_03_cwq_test_gpu.log', 'r') as f: #'05_10_test.log' # mm = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) # line = mm.readline() # while line: # cols = line.decode().strip().split('\t') # abstractquestion_pair = '\t'.join([cols[0], cols[1]]) # if float(cols[3]) > 0: # diction[abstractquestion_pair] = float(cols[3]) # line = mm.readline() # mm.close() # f.close() # return def read_abstractquestionpair_pro(): diction = dict() with open(data_question_match + '09_03_cwq_test_gpu.log', 'r') as f: #'05_10_test.log' mm = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) line = mm.readline() while line: cols = line.decode().strip().split('\t') abstractquestion_pair = '\t'.join([cols[1], cols[2]]) if float(cols[4]) > 0: diction[abstractquestion_pair] = float(cols[4]) line = mm.readline() mm.close() f.close() return diction abstractquestionpair_pro = read_abstractquestionpair_pro() # print(abstractquestionpair_pro) testqid_trainqidmax = dict() test_qid_trainqid_pro = dict() qid_abstractquestion = read_json(data_question_match + 'qid_abstractquestion.json') test_2_1 = read_structure_file(test_structure_with_2_1_grounded_graph_file) train_2_1 = read_structure_file( train_structure_with_2_1_grounded_graph_file) test_qid_predicate = reverse(data_question_match + 'test_predicate_qids.json') train_qid_predicate = reverse(data_question_match + 'train_predicate_qids.json') for one in test_2_1: qid = 'test_' + str(one.qid) print(qid) if qid not in qid_abstractquestion: continue abstractquestion = qid_abstractquestion[qid] trainqid_pro = dict() for train_one in train_2_1: train_one_qid = 'train_' + str(train_one.qid) if train_one_qid not in qid_abstractquestion: continue train_abstractquestion = qid_abstractquestion[train_one_qid] if '\t'.join([abstractquestion, train_abstractquestion]) in abstractquestionpair_pro: # print('\t'.join([abstractquestion,train_abstractquestion])) sim = abstractquestionpair_pro[('\t'.join( [abstractquestion, train_abstractquestion]))] trainqid_pro[train_one_qid] = float(sim) trainqid_pro = dict( sorted(trainqid_pro.items(), key=lambda d: d[1], reverse=True)) if len(trainqid_pro) == 0: continue if qid in test_qid_predicate: if list(trainqid_pro.keys())[0] in train_qid_predicate: if test_qid_predicate[qid] == train_qid_predicate[list( trainqid_pro.keys())[0]]: print('yeah') test_qid_trainqid_pro[qid] = trainqid_pro if len(list(trainqid_pro.keys())) > 0: testqid_trainqidmax[qid] = list(trainqid_pro.keys())[0] write_json(test_qid_trainqid_pro, data_question_match + 'test_qid_trainqid_pro_bert') write_json(testqid_trainqidmax, data_question_match + 'testqid_trainqid_bertmax.json')
from datasets_interface.virtuoso_interface import freebase_kb_interface from common.hand_files import read_json, write_json from common.globals_args import fn_cwq_file mid_to_names_dict = read_json(fn_cwq_file.cache_mid_to_names) def get_names(instance_str): if instance_str in mid_to_names_dict: mid_dict = mid_to_names_dict[instance_str] else: mid_dict = dict() mid_dict['answer_id'] = instance_str if isinstance(instance_str, str): # mid = 'm.02hwgbx' labels = freebase_kb_interface.get_names(instance_str) mid_dict['answer'] = list(labels) alias = freebase_kb_interface.get_alias(instance_str) mid_dict['aliases'] = list(alias) else: mid_dict['answer'] = [instance_str] mid_dict['aliases'] = [instance_str] mid_to_names_dict[instance_str] = mid_dict return mid_dict def write_cache_json(): write_json(mid_to_names_dict, fn_cwq_file.cache_mid_to_names) if __name__ == '__main__': cwq_prediction_test_json = read_json(
def get_2_2_graphs_by_type_and_literals(question_type, entities_or_literals): result = [] candidate_graphquerys = [] if len(entities_or_literals) == 2: # one literal, one entity literal_value = None entity_value = None for entity_or_literal in entities_or_literals: if entity_or_literal[1] == 'literal': literal_value = entity_or_literal[0] else: entity_value = entity_or_literal[0] if not isinstance(literal_value, str): literal_value = str(literal_value) if literal_value in grounding_args.literal_to_id_map: literal_value_id = str( grounding_args.literal_to_id_map[literal_value]) else: return result filename_1 = question_type filename_1 += '_entity_' + entity_value filename_1 += '_literal_' + literal_value_id filename_2 = question_type filename_2 += '_literal_' + literal_value_id filename_2 += '_entity_' + entity_value if filename_1 in grounding_args.oracle_all_files_path_names: #all_oracle_files_path: data_dict = read_json(grounding_args.oracle_file_root + filename_1) if question_type == 'conjunction': if grounding_args.q_mode == 'cwq': candidate_graphquerys = path_to_graph.parser_conjunction_q_cwq_( data_dict=data_dict, s1=entity_value, t1='entity', s2=literal_value_id, t2='literal') elif grounding_args.q_mode == 'graphq': candidate_graphquerys = path_to_graph.parser_conjunction_q_graphq( data_dict=data_dict, s1=entity_value, t1='entity', s2=literal_value_id, t2='literal') elif filename_2 is not None and filename_2 in grounding_args.oracle_all_files_path_names: #all_oracle_files_path: data_dict = read_json(grounding_args.oracle_file_root + filename_2) #file_result if grounding_args.q_mode == 'cwq': candidate_graphquerys = path_to_graph.parser_conjunction_q_cwq_( data_dict=data_dict, s1=literal_value_id, t1='literal', s2=entity_value, t2='entity') elif grounding_args.q_mode == 'graphq': candidate_graphquerys = path_to_graph.parser_conjunction_q_graphq( data_dict=data_dict, s1=literal_value_id, t1='literal', s2=entity_value, t2='entity') elif len(entities_or_literals) == 1: literal_value = None for entity_or_literal in entities_or_literals: if entity_or_literal[1] == 'literal': literal_value = entity_or_literal[0] if not isinstance(literal_value, str): literal_value = str(literal_value) if literal_value in grounding_args.literal_to_id_map: literal_value_id = str( grounding_args.literal_to_id_map[literal_value]) else: return result filename_1 = question_type filename_1 += '_literal_' + literal_value_id if filename_1 in grounding_args.oracle_all_files_path_names: # all_oracle_files_path: data_dict = read_json(grounding_args.oracle_file_root + filename_1) if question_type == 'composition': if grounding_args.q_mode == 'cwq': candidate_graphquerys = path_to_graph.parser_composition_q_cwq_( data_dict=data_dict, s1=literal_value, t1='literal') elif grounding_args.q_mode == 'graphq': candidate_graphquerys = path_to_graph.parser_composition_q_graphq( data_dict=data_dict, s1=literal_value, t1='literal') return grounding_utils.candidate_query_to_grounded_graph( candidate_graphquerys=candidate_graphquerys)
def _get_2_2_graphs_by_structure_and_type_only_entities( question_type=None, entities_or_literals=None, _2_1_graph=None, constraint='0'): filename_1 = question_type filename_2 = None if len(entities_or_literals) == 1: filename_1 += '_' + entities_or_literals[0][ 1] + '_' + entities_or_literals[0][0] elif len(entities_or_literals) == 2: filename_1 += '_' + entities_or_literals[0][ 1] + '_' + entities_or_literals[0][0] filename_1 += '_' + entities_or_literals[1][ 1] + '_' + entities_or_literals[1][0] filename_2 = question_type filename_2 += '_' + entities_or_literals[1][ 1] + '_' + entities_or_literals[1][0] filename_2 += '_' + entities_or_literals[0][ 1] + '_' + entities_or_literals[0][0] candidate_graphquerys = [] if filename_1 in grounding_args.oracle_all_files_path_names: data_dict = read_json(grounding_args.oracle_file_root + filename_1) if question_type == 'composition': if constraint == '1': is_constraint = True else: is_constraint = False if grounding_args.q_mode == 'cwq': candidate_graphquerys = path_to_graph.parser_composition_q_cwq_( data_dict=data_dict, s1=entities_or_literals[0][0], t1=entities_or_literals[0][1], constaint=is_constraint) elif grounding_args.q_mode == 'graphq': candidate_graphquerys = path_to_graph.parser_composition_q_graphq( data_dict=data_dict, s1=entities_or_literals[0][0], t1=entities_or_literals[0][1], constaint=is_constraint) elif question_type == 'conjunction': if grounding_args.q_mode == 'cwq': candidate_graphquerys = path_to_graph.parser_conjunction_q_cwq_( data_dict=data_dict, s1=entities_or_literals[0][0], t1=entities_or_literals[0][1], s2=entities_or_literals[1][0], t2=entities_or_literals[1][1]) elif grounding_args.q_mode == 'graphq': candidate_graphquerys = path_to_graph.parser_conjunction_q_graphq( data_dict=data_dict, s1=entities_or_literals[0][0], t1=entities_or_literals[0][1], s2=entities_or_literals[1][0], t2=entities_or_literals[1][1]) elif filename_2 is not None and filename_2 in grounding_args.oracle_all_files_path_names: data_dict = read_json(grounding_args.oracle_file_root + filename_2) if grounding_args.q_mode == 'cwq': candidate_graphquerys = path_to_graph.parser_conjunction_q_cwq_( data_dict=data_dict, s1=entities_or_literals[1][0], t1=entities_or_literals[1][1], s2=entities_or_literals[0][0], t2=entities_or_literals[0][1]) elif grounding_args.q_mode == 'graphq': candidate_graphquerys = path_to_graph.parser_conjunction_q_graphq( data_dict=data_dict, s1=entities_or_literals[1][0], t1=entities_or_literals[1][1], s2=entities_or_literals[0][0], t2=entities_or_literals[0][1]) return grounding_utils.candidate_query_to_grounded_graph( candidate_graphquerys=candidate_graphquerys)
all_complexwebq_list = read_complexwebq_question_json( globals_args.fn_cwq_file.complexwebquestion_all_questions_dir) complexwebq_test_list = read_complexwebq_question_json( globals_args.fn_cwq_file.complexwebquestion_test_dir) complexwebq_dev_list = read_complexwebq_question_json( globals_args.fn_cwq_file.complexwebquestion_dev_dir) complexwebq_train_list = read_complexwebq_question_json( globals_args.fn_cwq_file.complexwebquestion_train_dir) bgp_test_qid_to_graphs_dict = questions_utils.extract_grounded_graph_from_jena_freebase( globals_args.fn_cwq_file.complexwebquestion_test_bgp_dir) bgp_dev_qid_to_graphs_dict = questions_utils.extract_grounded_graph_from_jena_freebase( globals_args.fn_cwq_file.complexwebquestion_dev_bgp_dir) bgp_train_qid_to_graphs_dict = questions_utils.extract_grounded_graph_from_jena_freebase( globals_args.fn_cwq_file.complexwebquestion_train_bgp_dir) annotation_node_questions_json = read_json( globals_args.fn_cwq_file.complexwebquestion_node_ann_dir) def get_answers_by_question(question_normal=None): ''' "answers": [ { "answer": "Super Bowl XLVII", "answer_id": "m.0642vqv", "aliases": [ "Super Bowl 2013", "Super Bowl 47" ] } ], :param question_normal:
lcquad_annotation.verbalized_question = question_ann[ 'intermediary_question'] lcquad_annotation.qid = question_ann['_id'] lcquad_list.append(lcquad_annotation) return lcquad_list lcquad_test_list = read_train_test_data( filepath=fn_lcquad_file.lcquad_test_dir) lcquad_train_list = read_train_test_data( filepath=fn_lcquad_file.lcquad_train_dir) bgp_test_qid_to_graphs_dict = questions_utils.extract_grounded_graph_from_jena_dbpedia( fn_lcquad_file.lcquad_test_bgp_dir) bgp_train_qid_to_graphs_dict = questions_utils.extract_grounded_graph_from_jena_dbpedia( fn_lcquad_file.lcquad_train_bgp_dir) annotation_node_answers_all_questions_json = read_json( fn_lcquad_file.lcquad_all_q_node_ann_dir) def get_answers_by_question(question=None): answers = [] for data_ann in annotation_node_answers_all_questions_json: if data_ann['question_normal'] == question: answers = data_ann['answers'] return answers def get_type_by_question(question=None): question_type = 'bgp' for data_ann in annotation_node_answers_all_questions_json: if data_ann['question_normal'] == question: question_type = data_ann['type']
from datasets_interface.virtuoso_interface import freebase_kb_interface from common.hand_files import read_json, write_json from common.globals_args import fn_graph_file mid_to_names_dict = read_json(fn_graph_file.cache_mid_to_names) def get_names(instance_str): if instance_str in mid_to_names_dict: mid_dict = mid_to_names_dict[instance_str] else: mid_dict = dict() mid_dict['answer_id'] = instance_str if isinstance(instance_str, str): # mid = 'm.02hwgbx' labels = freebase_kb_interface.get_names(instance_str) print('#labels:\t', instance_str, labels) mid_dict['answer'] = list(labels) alias = freebase_kb_interface.get_alias(instance_str) print('#alias:\t', instance_str, alias) mid_dict['aliases'] = list(alias) else: mid_dict['answer'] = [instance_str] mid_dict['aliases'] = [instance_str] mid_to_names_dict[instance_str] = mid_dict return mid_dict def write_cache_json(): write_json(mid_to_names_dict, fn_graph_file.cache_mid_to_names)