Exemple #1
0
    def set_model_data(self):
        assert mode in ['cwq', 'graphq']
        if mode == 'cwq':
            model_file = fn_cwq_file.model_file + "_iter_{}_devf1_{}_model.pt".format(
                2720, 52)
            self.relortype_level_word = read_json(
                kb_freebase_latest_file.dataset + "relortype_level_words.json")
        elif mode == 'graphq':
            model_file = fn_graph_file.model_file + "_iter_{}_devf1_{}_model.pt".format(
                570, 48)
            self.relortype_level_word = read_json(kb_freebase_en_2013.dataset +
                                                  "relortype_level_words.json")

        if self.model_parameters.gpu >= 0:
            self.model = torch.load(model_file,
                                    map_location=lambda storage, location:
                                    storage.cuda(self.model_parameters.gpu))
        else:
            self.model = torch.load(
                model_file, map_location=lambda storage, location: storage)
        self.model.eval()
        self.wem = WordEmbedding()
        # self.pretrained_embedding = torch.load(self.model_parameters.vector_cache_file)
        # self.word_dict = torch.load(self.model_parameters.word_dict_file)
        # self.word_pair_sim = torch.load(fn_cwq_file.question_match_dir + 'word_pair_sim.pt')
        # self.pad_index = self.word_dict.lookup(self.word_dict.pad_token)
        self.word_pair_sim = dict()
Exemple #2
0
 def __init__(self):
     if mode == 'cwq':
         self.train_qid_to_grounded_graph_dict = extract_grounded_graph_from_jena_freebase(
             globals_args.fn_cwq_file.complexwebquestion_train_bgp_dir)
         self.testqid_trainqid_bertmax = read_json(
             fn_cwq_file.question_match_dir +
             'testqid_trainqid_bertmax.json')
         self.testqid_correspondingtrainqid_denotations = read_json(
             fn_cwq_file.question_match_dir +
             'testqid_correspondingtrainqid_denotations.json')
def generate_trainset():
    trainset = []
    train_predicate_qids = read_json(data_question_match +
                                     'train_predicate_qids.json')
    qid_abstractquestions = read_json(data_question_match +
                                      'qid_abstractquestion.json')

    abstractquestion_all = set()
    for predicate in train_predicate_qids:
        for qid in train_predicate_qids[predicate]:
            #"train_WebQTrn-3513_7c4117891abf63781b892537979054c6",
            if qid in qid_abstractquestions:
                abstractquestion_all.add(qid_abstractquestions[qid])

    for k, predicate in enumerate(train_predicate_qids):
        print(k, predicate)

        same_abstractquestions = set()
        for qid in train_predicate_qids[predicate]:
            if qid in qid_abstractquestions:
                same_abstractquestions.add(qid_abstractquestions[qid])

        residu_abstractquestions = (list(abstractquestion_all -
                                         same_abstractquestions))
        same_abstractquestions = list(same_abstractquestions)[:10]

        for first, current in enumerate(same_abstractquestions):
            for second, gold in enumerate(same_abstractquestions):
                if current != gold:
                    random.shuffle(residu_abstractquestions)
                    neg_samples = residu_abstractquestions[:50]
                    trainset.append([current, gold, 1])
                    for neg in neg_samples:
                        trainset.append([current, neg, 0])

        # if len(same_abstractquestions)>1:
        #     current=list(same_abstractquestions)[0]
        #     gold=list(same_abstractquestions)[1]
        #     random.shuffle(residu_abstractquestions)
        #     neg_samples = residu_abstractquestions[:20]
        #     trainset.append([current,gold,1])
        #     for neg in neg_samples:
        #         trainset.append([current, neg, 0])
        #     # trainset.append([current, gold, neg_samples])
        #     current = list(same_abstractquestions)[1]
        #     gold = list(same_abstractquestions)[0]
        #     random.shuffle(residu_abstractquestions)
        #     neg_samples = residu_abstractquestions[:20]
        #     trainset.append([current, gold, 1])
        #     for neg in neg_samples:
        #         trainset.append([current, neg, 0])
        #     # trainset.append([current, gold, neg_samples])

    write_json(trainset, data_question_match + 'trainset.json')
 def reverse(path):
     data = read_json(path)
     res = dict()
     for key in data:
         for val in data[key]:
             res[val] = key
     return res
def generate_testset():
    testset = []
    test_2_1 = read_structure_file(test_structure_with_2_1_grounded_graph_file)
    train_predicate_qids = read_json(data_question_match +
                                     'train_predicate_qids.json')
    qid_abstractquestions = read_json(data_question_match +
                                      'qid_abstractquestion.json')
    train_abstractquestion = set()
    for predicate in train_predicate_qids:
        for qid in train_predicate_qids[predicate]:
            if qid in qid_abstractquestions:
                train_abstractquestion.add(qid_abstractquestions[qid])
    test_abstractquestions = set()
    for one in test_2_1:
        if 'test_' + str(one.qid) in qid_abstractquestions:
            abstractquestion = qid_abstractquestions['test_' + str(one.qid)]
            test_abstractquestions.add(abstractquestion)
    for abstractquestion in test_abstractquestions:
        for ta in train_abstractquestion:
            testset.append([abstractquestion, ta])
    write_json(testset, data_question_match + 'testset.json')
Exemple #6
0
    def __init__(self):
        if mode == 'cwq':
            self.train_qid_to_grounded_graph_dict = extract_grounded_graph_from_jena_freebase(
                globals_args.fn_cwq_file.complexwebquestion_train_bgp_dir)
            self.testqid_trainqid_bertmax = read_json(
                fn_cwq_file.question_match_dir +
                'testqid_trainqid_bertmax.json')
            self.testqid_correspondingtrainqid_denotations = read_json(
                fn_cwq_file.question_match_dir +
                'testqid_correspondingtrainqid_denotations.json')

        elif mode == 'lcquad':
            self.train_qid_to_grounded_graph_dict = extract_grounded_graph_from_jena_dbpedia(
                fn_lcquad_file.lcquad_train_bgp_dir)
            self.testqid_trainqid_bertmax = read_json(
                fn_lcquad_file.question_match_dir +
                'testqid_trainqid_bertmax.json')
            self.testqid_correspondingtrainqid_denotations = read_json(
                fn_lcquad_file.question_match_dir +
                'testqid_correspondingtrainqid_denotations.json')
            # self.testqid_correspondingtrainqid_denotations={}

        elif mode == 'webq':
            resources_webq = root + '/resources_webq/'
            data_question_match = resources_webq + 'data_question_match/'
            train_webq_bgp_filepath = root + '\dataset_questions\webquestions/2019.06.04_wsp_train_bgp.txt'
            self.train_qid_to_grounded_graph_dict = extract_grounded_graph_from_jena_freebase(
                train_webq_bgp_filepath)
            self.testqid_trainqid_bertmax = read_json(
                data_question_match + 'testqid_trainqid_bertmax.json')
            self.testqid_correspondingtrainqid_denotations = read_json(
                data_question_match +
                'testqid_correspondingtrainqid_denotations.json')
def _convert_file_to_oracle_graphs(file_path,
                                   question_type,
                                   entities_or_literals,
                                   is_constraint_mediator=False):
    candidate_graphquerys = []
    data_dict = read_json(grounding_args.oracle_file_root + file_path)
    if question_type == 'composition':
        if grounding_args.q_mode in ['cwq', 'graphq']:
            candidate_graphquerys = path_to_graph_bgp.parser_composition_q_freebase_sp(
                data_dict=data_dict,
                s1=entities_or_literals[0][0],
                t1=entities_or_literals[0][1],
                is_constraint_mediator=is_constraint_mediator)
        elif grounding_args.q_mode in ['lcquad']:
            candidate_graphquerys = path_to_graph_bgp.parser_composition_q_dbpedia_sp(
                data_dict=data_dict,
                s1=entities_or_literals[0][0],
                t1=entities_or_literals[0][1],
                is_constraint_mediator=is_constraint_mediator)
    elif question_type == 'conjunction':
        if grounding_args.q_mode in ['cwq', 'graphq']:
            candidate_graphquerys = path_to_graph_bgp.parser_conjunction_q_freebase(
                data_dict=data_dict,
                s1=entities_or_literals[0][0],
                t1=entities_or_literals[0][1],
                s2=entities_or_literals[1][0],
                t2=entities_or_literals[1][1])
        elif grounding_args.q_mode in ['lcquad']:
            candidate_graphquerys = path_to_graph_bgp.parser_conjunction_q_dbpedia(
                data_dict=data_dict,
                s1=entities_or_literals[0][0],
                t1=entities_or_literals[0][1],
                s2=entities_or_literals[1][0],
                t2=entities_or_literals[1][1])
    elif question_type == 'ask':
        if grounding_args.q_mode in ['lcquad']:
            candidate_graphquerys = path_to_graph_bgp.parser_yesno_q_dbpedia(
                data_dict=data_dict,
                s1=entities_or_literals[0][0],
                t1=entities_or_literals[0][1],
                s2=entities_or_literals[1][0],
                t2=entities_or_literals[1][1])

    return grounding_utils.candidate_query_to_grounded_graph(
        candidate_graphquerys=candidate_graphquerys)
def _get_oracle_graphs_comparative(_2_1_grounded_graph):
    anchor_entities_list = []
    anchor_literal_list = []
    for node in _2_1_grounded_graph.nodes:
        if node.node_type == 'entity':
            anchor_entities_list.append(node.id)
        elif node.node_type == 'literal':
            anchor_literal_list.append(node.id)
    candidate_graphquerys = []
    for entity in anchor_entities_list:
        print('#anchor:\t', entity)
        filename_1 = 'comparative_entity_' + entity
        if filename_1 in grounding_args.oracle_all_files_path_names:
            data_dict = read_json(grounding_args.oracle_file_root + filename_1)
            candidate_graphquerys.extend(
                path_to_graph_comparative.parser_comparative_q_freebase_ir(
                    data_dict=data_dict, s1=entity, t1='entity'))
    return grounding_utils.candidate_query_to_grounded_graph(
        candidate_graphquerys=candidate_graphquerys)
def investigate_denotation_same():

    testqid_trainqid_bertmax = read_json(data_question_match +
                                         'testqid_trainqid_bertmax.json')
    qmi = QuestionMatchInterface()
    structure_2_2_files = '/2.2_test_span_transfer_wo_wordlevel/'
    all_data_path = os.listdir(output_path + structure_2_2_files)
    for path in all_data_path:
        print(path)
        test_qid = path.split('.')[0]
        test_qid = 'test_' + str(test_qid)
        # if 'test_'+str(test_qid) not in testqid_trainqid_bertmax:
        if test_qid not in testqid_trainqid_bertmax:
            continue
        # structure_with_grounded_graphq_file = output_path + structure_2_2_files + path
        structure_list = read_structure_file(output_path +
                                             structure_2_2_files + path)
        for structure in structure_list:
            for ungrounded_graph in structure.ungrounded_graph_forest:
                nodes = []
                for groundedgraph in ungrounded_graph.get_grounded_graph_forest(
                ):
                    nodes = groundedgraph.nodes
                    break
                # print(test_qid)
                # denotation = set(qmi.get_denotation_by_testqid_nodes(test_qid, nodes))
                denotation = set(
                    qmi.get_denotation_by_testqid_nodes_freebase(
                        test_qid, nodes))
                print('denotations:', denotation)
                # gold_mids = set()
                # for one in structure.gold_answer:
                #     gold_mids.add(one['answer_id'])
                #
                # if  (len(denotation-gold_mids)==0 and len(gold_mids-denotation)==0):
                #     print('oh no',test_qid)
                #     if test_qid in qmunique_qids:
                #         print('double oh no')
    write_json(
        qmi.testqid_correspondingtrainqid_denotations,
        data_question_match + 'testqid_correspondingtrainqid_denotations.json')
def _get_2_2_graphs_by_type_and_literals(question_type,
                                         entities_or_literals,
                                         is_constraint_mediator=False):
    candidate_graphquerys = []
    if len(entities_or_literals) == 2:  # one literal, one entity
        literal_value = None
        entity_value = None
        for entity_or_literal in entities_or_literals:
            if entity_or_literal[1] == 'literal':
                literal_value = entity_or_literal[0]
            else:
                entity_value = entity_or_literal[0]
        if not isinstance(literal_value, str):
            literal_value = str(literal_value)

        literal_value = grounding_utils.literal_postprocess(
            literal_value, q_mode=grounding_args.q_mode)
        if literal_value in grounding_args.literal_to_id_map:
            literal_value_id = str(
                grounding_args.literal_to_id_map[literal_value])
        else:
            return []

        filename_1 = question_type + '_entity_' + entity_value + '_literal_' + literal_value_id
        filename_2 = question_type + '_literal_' + literal_value_id + '_entity_' + entity_value

        if filename_1 in grounding_args.oracle_all_files_path_names:
            data_dict = read_json(grounding_args.oracle_file_root + filename_1)
            if question_type == 'conjunction':
                if grounding_args.q_mode in ['cwq', 'graphq']:
                    candidate_graphquerys = path_to_graph_bgp.parser_conjunction_q_freebase(
                        data_dict=data_dict,
                        s1=entity_value,
                        t1='entity',
                        s2=literal_value_id,
                        t2='literal')
                elif grounding_args.q_mode in ['lcquad']:
                    candidate_graphquerys = path_to_graph_bgp.parser_conjunction_q_dbpedia(
                        data_dict=data_dict,
                        s1=entity_value,
                        t1='entity',
                        s2=literal_value_id,
                        t2='literal')

        elif filename_2 in grounding_args.oracle_all_files_path_names:
            data_dict = read_json(grounding_args.oracle_file_root + filename_2)
            if grounding_args.q_mode in ['cwq', 'graphq']:
                candidate_graphquerys = path_to_graph_bgp.parser_conjunction_q_freebase(
                    data_dict=data_dict,
                    s1=literal_value_id,
                    t1='literal',
                    s2=entity_value,
                    t2='entity')
            elif grounding_args.q_mode in ['lcquad']:
                candidate_graphquerys = path_to_graph_bgp.parser_conjunction_q_dbpedia(
                    data_dict=data_dict,
                    s1=literal_value_id,
                    t1='literal',
                    s2=entity_value,
                    t2='entity')

    elif len(entities_or_literals) == 1:
        literal_value = None
        for entity_or_literal in entities_or_literals:
            if entity_or_literal[1] == 'literal':
                literal_value = entity_or_literal[0]
        if not isinstance(literal_value, str):
            literal_value = str(literal_value)

        literal_value = grounding_utils.literal_postprocess(
            literal_value, q_mode=grounding_args.q_mode)
        if literal_value in grounding_args.literal_to_id_map:
            literal_value_id = str(
                grounding_args.literal_to_id_map[literal_value])
        else:
            return []

        filename_1 = question_type + '_literal_' + literal_value_id
        if filename_1 in grounding_args.oracle_all_files_path_names:
            data_dict = read_json(grounding_args.oracle_file_root + filename_1)
            if question_type == 'composition':
                if grounding_args.q_mode in ['cwq', 'graphq']:
                    candidate_graphquerys = path_to_graph_bgp.parser_composition_q_freebase_sp(
                        data_dict=data_dict,
                        s1=literal_value,
                        t1='literal',
                        is_constraint_mediator=is_constraint_mediator)
                elif grounding_args.q_mode in ['lcquad']:
                    candidate_graphquerys = path_to_graph_bgp.parser_composition_q_dbpedia_sp(
                        data_dict=data_dict,
                        s1=literal_value,
                        t1='literal',
                        is_constraint_mediator=is_constraint_mediator)

    return grounding_utils.candidate_query_to_grounded_graph(
        candidate_graphquerys=candidate_graphquerys)
                GroundedEdge(start=edge["start"],
                             end=edge["end"],
                             relation=edge["relation"],
                             friendly_name=edge["friendly_name"],
                             score=1.0))
        graphq.sparql_query = questionAnnotation["sparql_query"]
        graphq.parsed_sparql = questionAnnotation['parsed_sparql']
        graphquestionsList.append(graphq)
    return graphquestionsList


test_graph_questions_struct = read_graph_question_json(
    globals_args.fn_graph_file.graphquestions_testing_dir)
train_graph_questions_struct = read_graph_question_json(
    globals_args.fn_graph_file.graphquestions_training_dir)
annotation_node_questions_json = read_json(
    globals_args.fn_graph_file.graphquestions_node_ann_dir)


def get_answers_by_question(question=None):
    answers = []
    for data_ann in test_graph_questions_struct:
        if data_ann.question == question:
            answers = data_ann.answer
            break
    for data_ann in train_graph_questions_struct:
        if data_ann.question == question:
            answers = data_ann.answer
    return answers


def get_answers_mid_by_question(question=None):
def generate_predicate_qids():
    train_qid_to_grounded_graph_dict = questions_utils.extract_grounded_graph_from_jena_freebase(
        train_cwq_bgp_filepath)
    # dev_qid_to_grounded_graph_dict = questions_utils.extract_grounded_graph_from_jena_freebase(dev_cwq_bgp_filepath)
    test_qid_to_grounded_graph_dict = questions_utils.extract_grounded_graph_from_jena_freebase(
        test_cwq_bgp_filepath)
    qid_abstractquestions = read_json(data_question_match +
                                      'qid_abstractquestion.json')
    train_predicate_qids = collections.defaultdict(list)
    for qid, grounded_graph in train_qid_to_grounded_graph_dict.items():
        # qid='train_'+str(qid.split('-')[1])
        qid = 'train_' + qid
        if qid not in qid_abstractquestions:
            continue
        predicates = []
        for edge in grounded_graph.edges:
            predicates.append(edge.friendly_name)
        predicates.sort()
        predicate = '\t'.join(predicates)
        # print(qid)
        if len(qid_abstractquestions[qid]) > 0:
            # print('hi',qid)
            # abstractquestion = qid_abstractquestions[qid]
            train_predicate_qids[predicate].append(qid)
    write_json(train_predicate_qids,
               data_question_match + 'train_predicate_qids.json')

    test_predicate_qids = collections.defaultdict(list)
    for qid, grounded_graph in test_qid_to_grounded_graph_dict.items():
        # qid = 'test_' + str(qid.split('-')[1])
        qid = 'test_' + qid
        if qid not in qid_abstractquestions:
            continue
        predicates = []
        for edge in grounded_graph.edges:
            predicates.append(edge.friendly_name)
        predicates.sort()
        predicate = '\t'.join(predicates)
        if len(qid_abstractquestions[qid]) > 0:
            # abstractquestion = qid_abstractquestions[qid]
            test_predicate_qids[predicate].append(qid)
    write_json(test_predicate_qids,
               data_question_match + 'test_predicate_qids.json')

    # dev_predicate_qids = collections.defaultdict(list)
    # for qid, grounded_graph in dev_qid_to_grounded_graph_dict.items():
    #     # qid = 'dev_' + str(qid.split('-')[1])
    #     qid='dev_' + qid
    #     if qid not in qid_abstractquestions:
    #         continue
    #     predicates = []
    #     for edge in grounded_graph.edges:
    #         predicates.append(edge.friendly_name)
    #     predicates.sort()
    #     predicate = '\t'.join(predicates)
    #     if len(qid_abstractquestions[qid]) > 0:
    #         # abstractquestion = qid_abstractquestions[qid]
    #         dev_predicate_qids[predicate].append(qid)
    # write_json(dev_predicate_qids, data_question_match + 'dev_predicate_qids.json')

    num_intersect = 0
    # 2718
    for predicate in test_predicate_qids:
        if predicate in train_predicate_qids:
            num_intersect += len(test_predicate_qids[predicate])
    print(num_intersect)
def score_testquestion_bert():
    def reverse(path):
        data = read_json(path)
        res = dict()
        for key in data:
            for val in data[key]:
                res[val] = key
        return res

    # def read_abstractquestionpair_pro():
    #     diction = dict()
    #     with open(data_question_match + '09_03_cwq_test_gpu.log', 'r') as f: #'05_10_test.log'
    #         mm = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
    #         line = mm.readline()
    #         while line:
    #             cols = line.decode().strip().split('\t')
    #             abstractquestion_pair = '\t'.join([cols[0], cols[1]])
    #             if float(cols[3]) > 0:
    #                 diction[abstractquestion_pair] = float(cols[3])
    #             line = mm.readline()
    #     mm.close()
    #     f.close()
    #     return

    def read_abstractquestionpair_pro():
        diction = dict()
        with open(data_question_match + '09_03_cwq_test_gpu.log',
                  'r') as f:  #'05_10_test.log'
            mm = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
            line = mm.readline()
            while line:
                cols = line.decode().strip().split('\t')
                abstractquestion_pair = '\t'.join([cols[1], cols[2]])
                if float(cols[4]) > 0:
                    diction[abstractquestion_pair] = float(cols[4])
                line = mm.readline()
        mm.close()
        f.close()
        return diction

    abstractquestionpair_pro = read_abstractquestionpair_pro()
    # print(abstractquestionpair_pro)
    testqid_trainqidmax = dict()
    test_qid_trainqid_pro = dict()
    qid_abstractquestion = read_json(data_question_match +
                                     'qid_abstractquestion.json')
    test_2_1 = read_structure_file(test_structure_with_2_1_grounded_graph_file)
    train_2_1 = read_structure_file(
        train_structure_with_2_1_grounded_graph_file)
    test_qid_predicate = reverse(data_question_match +
                                 'test_predicate_qids.json')
    train_qid_predicate = reverse(data_question_match +
                                  'train_predicate_qids.json')

    for one in test_2_1:
        qid = 'test_' + str(one.qid)
        print(qid)
        if qid not in qid_abstractquestion:
            continue
        abstractquestion = qid_abstractquestion[qid]

        trainqid_pro = dict()
        for train_one in train_2_1:
            train_one_qid = 'train_' + str(train_one.qid)
            if train_one_qid not in qid_abstractquestion:
                continue

            train_abstractquestion = qid_abstractquestion[train_one_qid]
            if '\t'.join([abstractquestion,
                          train_abstractquestion]) in abstractquestionpair_pro:
                # print('\t'.join([abstractquestion,train_abstractquestion]))
                sim = abstractquestionpair_pro[('\t'.join(
                    [abstractquestion, train_abstractquestion]))]
                trainqid_pro[train_one_qid] = float(sim)

        trainqid_pro = dict(
            sorted(trainqid_pro.items(), key=lambda d: d[1], reverse=True))
        if len(trainqid_pro) == 0:
            continue

        if qid in test_qid_predicate:
            if list(trainqid_pro.keys())[0] in train_qid_predicate:
                if test_qid_predicate[qid] == train_qid_predicate[list(
                        trainqid_pro.keys())[0]]:
                    print('yeah')

        test_qid_trainqid_pro[qid] = trainqid_pro
        if len(list(trainqid_pro.keys())) > 0:
            testqid_trainqidmax[qid] = list(trainqid_pro.keys())[0]
    write_json(test_qid_trainqid_pro,
               data_question_match + 'test_qid_trainqid_pro_bert')
    write_json(testqid_trainqidmax,
               data_question_match + 'testqid_trainqid_bertmax.json')
from datasets_interface.virtuoso_interface import freebase_kb_interface
from common.hand_files import read_json, write_json
from common.globals_args import fn_cwq_file

mid_to_names_dict = read_json(fn_cwq_file.cache_mid_to_names)


def get_names(instance_str):
    if instance_str in mid_to_names_dict:
        mid_dict = mid_to_names_dict[instance_str]
    else:
        mid_dict = dict()
        mid_dict['answer_id'] = instance_str
        if isinstance(instance_str, str):  # mid = 'm.02hwgbx'
            labels = freebase_kb_interface.get_names(instance_str)
            mid_dict['answer'] = list(labels)
            alias = freebase_kb_interface.get_alias(instance_str)
            mid_dict['aliases'] = list(alias)
        else:
            mid_dict['answer'] = [instance_str]
            mid_dict['aliases'] = [instance_str]
        mid_to_names_dict[instance_str] = mid_dict
    return mid_dict


def write_cache_json():
    write_json(mid_to_names_dict, fn_cwq_file.cache_mid_to_names)


if __name__ == '__main__':
    cwq_prediction_test_json = read_json(
Exemple #15
0
def get_2_2_graphs_by_type_and_literals(question_type, entities_or_literals):
    result = []
    candidate_graphquerys = []
    if len(entities_or_literals) == 2:  # one literal, one entity
        literal_value = None
        entity_value = None
        for entity_or_literal in entities_or_literals:
            if entity_or_literal[1] == 'literal':
                literal_value = entity_or_literal[0]
            else:
                entity_value = entity_or_literal[0]
        if not isinstance(literal_value, str):
            literal_value = str(literal_value)
        if literal_value in grounding_args.literal_to_id_map:
            literal_value_id = str(
                grounding_args.literal_to_id_map[literal_value])
        else:
            return result
        filename_1 = question_type
        filename_1 += '_entity_' + entity_value
        filename_1 += '_literal_' + literal_value_id
        filename_2 = question_type
        filename_2 += '_literal_' + literal_value_id
        filename_2 += '_entity_' + entity_value
        if filename_1 in grounding_args.oracle_all_files_path_names:  #all_oracle_files_path:
            data_dict = read_json(grounding_args.oracle_file_root + filename_1)
            if question_type == 'conjunction':
                if grounding_args.q_mode == 'cwq':
                    candidate_graphquerys = path_to_graph.parser_conjunction_q_cwq_(
                        data_dict=data_dict,
                        s1=entity_value,
                        t1='entity',
                        s2=literal_value_id,
                        t2='literal')
                elif grounding_args.q_mode == 'graphq':
                    candidate_graphquerys = path_to_graph.parser_conjunction_q_graphq(
                        data_dict=data_dict,
                        s1=entity_value,
                        t1='entity',
                        s2=literal_value_id,
                        t2='literal')
        elif filename_2 is not None and filename_2 in grounding_args.oracle_all_files_path_names:  #all_oracle_files_path:
            data_dict = read_json(grounding_args.oracle_file_root +
                                  filename_2)  #file_result
            if grounding_args.q_mode == 'cwq':
                candidate_graphquerys = path_to_graph.parser_conjunction_q_cwq_(
                    data_dict=data_dict,
                    s1=literal_value_id,
                    t1='literal',
                    s2=entity_value,
                    t2='entity')
            elif grounding_args.q_mode == 'graphq':
                candidate_graphquerys = path_to_graph.parser_conjunction_q_graphq(
                    data_dict=data_dict,
                    s1=literal_value_id,
                    t1='literal',
                    s2=entity_value,
                    t2='entity')
    elif len(entities_or_literals) == 1:
        literal_value = None
        for entity_or_literal in entities_or_literals:
            if entity_or_literal[1] == 'literal':
                literal_value = entity_or_literal[0]
        if not isinstance(literal_value, str):
            literal_value = str(literal_value)
        if literal_value in grounding_args.literal_to_id_map:
            literal_value_id = str(
                grounding_args.literal_to_id_map[literal_value])
        else:
            return result
        filename_1 = question_type
        filename_1 += '_literal_' + literal_value_id
        if filename_1 in grounding_args.oracle_all_files_path_names:  # all_oracle_files_path:
            data_dict = read_json(grounding_args.oracle_file_root + filename_1)
            if question_type == 'composition':
                if grounding_args.q_mode == 'cwq':
                    candidate_graphquerys = path_to_graph.parser_composition_q_cwq_(
                        data_dict=data_dict, s1=literal_value, t1='literal')
                elif grounding_args.q_mode == 'graphq':
                    candidate_graphquerys = path_to_graph.parser_composition_q_graphq(
                        data_dict=data_dict, s1=literal_value, t1='literal')
    return grounding_utils.candidate_query_to_grounded_graph(
        candidate_graphquerys=candidate_graphquerys)
Exemple #16
0
def _get_2_2_graphs_by_structure_and_type_only_entities(
        question_type=None,
        entities_or_literals=None,
        _2_1_graph=None,
        constraint='0'):
    filename_1 = question_type
    filename_2 = None
    if len(entities_or_literals) == 1:
        filename_1 += '_' + entities_or_literals[0][
            1] + '_' + entities_or_literals[0][0]
    elif len(entities_or_literals) == 2:
        filename_1 += '_' + entities_or_literals[0][
            1] + '_' + entities_or_literals[0][0]
        filename_1 += '_' + entities_or_literals[1][
            1] + '_' + entities_or_literals[1][0]
        filename_2 = question_type
        filename_2 += '_' + entities_or_literals[1][
            1] + '_' + entities_or_literals[1][0]
        filename_2 += '_' + entities_or_literals[0][
            1] + '_' + entities_or_literals[0][0]
    candidate_graphquerys = []
    if filename_1 in grounding_args.oracle_all_files_path_names:
        data_dict = read_json(grounding_args.oracle_file_root + filename_1)
        if question_type == 'composition':
            if constraint == '1': is_constraint = True
            else: is_constraint = False
            if grounding_args.q_mode == 'cwq':
                candidate_graphquerys = path_to_graph.parser_composition_q_cwq_(
                    data_dict=data_dict,
                    s1=entities_or_literals[0][0],
                    t1=entities_or_literals[0][1],
                    constaint=is_constraint)
            elif grounding_args.q_mode == 'graphq':
                candidate_graphquerys = path_to_graph.parser_composition_q_graphq(
                    data_dict=data_dict,
                    s1=entities_or_literals[0][0],
                    t1=entities_or_literals[0][1],
                    constaint=is_constraint)
        elif question_type == 'conjunction':
            if grounding_args.q_mode == 'cwq':
                candidate_graphquerys = path_to_graph.parser_conjunction_q_cwq_(
                    data_dict=data_dict,
                    s1=entities_or_literals[0][0],
                    t1=entities_or_literals[0][1],
                    s2=entities_or_literals[1][0],
                    t2=entities_or_literals[1][1])
            elif grounding_args.q_mode == 'graphq':
                candidate_graphquerys = path_to_graph.parser_conjunction_q_graphq(
                    data_dict=data_dict,
                    s1=entities_or_literals[0][0],
                    t1=entities_or_literals[0][1],
                    s2=entities_or_literals[1][0],
                    t2=entities_or_literals[1][1])
    elif filename_2 is not None and filename_2 in grounding_args.oracle_all_files_path_names:
        data_dict = read_json(grounding_args.oracle_file_root + filename_2)
        if grounding_args.q_mode == 'cwq':
            candidate_graphquerys = path_to_graph.parser_conjunction_q_cwq_(
                data_dict=data_dict,
                s1=entities_or_literals[1][0],
                t1=entities_or_literals[1][1],
                s2=entities_or_literals[0][0],
                t2=entities_or_literals[0][1])
        elif grounding_args.q_mode == 'graphq':
            candidate_graphquerys = path_to_graph.parser_conjunction_q_graphq(
                data_dict=data_dict,
                s1=entities_or_literals[1][0],
                t1=entities_or_literals[1][1],
                s2=entities_or_literals[0][0],
                t2=entities_or_literals[0][1])
    return grounding_utils.candidate_query_to_grounded_graph(
        candidate_graphquerys=candidate_graphquerys)
all_complexwebq_list = read_complexwebq_question_json(
    globals_args.fn_cwq_file.complexwebquestion_all_questions_dir)
complexwebq_test_list = read_complexwebq_question_json(
    globals_args.fn_cwq_file.complexwebquestion_test_dir)
complexwebq_dev_list = read_complexwebq_question_json(
    globals_args.fn_cwq_file.complexwebquestion_dev_dir)
complexwebq_train_list = read_complexwebq_question_json(
    globals_args.fn_cwq_file.complexwebquestion_train_dir)

bgp_test_qid_to_graphs_dict = questions_utils.extract_grounded_graph_from_jena_freebase(
    globals_args.fn_cwq_file.complexwebquestion_test_bgp_dir)
bgp_dev_qid_to_graphs_dict = questions_utils.extract_grounded_graph_from_jena_freebase(
    globals_args.fn_cwq_file.complexwebquestion_dev_bgp_dir)
bgp_train_qid_to_graphs_dict = questions_utils.extract_grounded_graph_from_jena_freebase(
    globals_args.fn_cwq_file.complexwebquestion_train_bgp_dir)
annotation_node_questions_json = read_json(
    globals_args.fn_cwq_file.complexwebquestion_node_ann_dir)


def get_answers_by_question(question_normal=None):
    '''
    "answers": [
            {
                "answer": "Super Bowl XLVII",
                "answer_id": "m.0642vqv",
                "aliases": [
                    "Super Bowl 2013",
                    "Super Bowl 47"
                ]
            }
        ],
    :param question_normal:
Exemple #18
0
        lcquad_annotation.verbalized_question = question_ann[
            'intermediary_question']
        lcquad_annotation.qid = question_ann['_id']
        lcquad_list.append(lcquad_annotation)
    return lcquad_list


lcquad_test_list = read_train_test_data(
    filepath=fn_lcquad_file.lcquad_test_dir)
lcquad_train_list = read_train_test_data(
    filepath=fn_lcquad_file.lcquad_train_dir)
bgp_test_qid_to_graphs_dict = questions_utils.extract_grounded_graph_from_jena_dbpedia(
    fn_lcquad_file.lcquad_test_bgp_dir)
bgp_train_qid_to_graphs_dict = questions_utils.extract_grounded_graph_from_jena_dbpedia(
    fn_lcquad_file.lcquad_train_bgp_dir)
annotation_node_answers_all_questions_json = read_json(
    fn_lcquad_file.lcquad_all_q_node_ann_dir)


def get_answers_by_question(question=None):
    answers = []
    for data_ann in annotation_node_answers_all_questions_json:
        if data_ann['question_normal'] == question:
            answers = data_ann['answers']
    return answers


def get_type_by_question(question=None):
    question_type = 'bgp'
    for data_ann in annotation_node_answers_all_questions_json:
        if data_ann['question_normal'] == question:
            question_type = data_ann['type']
Exemple #19
0
from datasets_interface.virtuoso_interface import freebase_kb_interface
from common.hand_files import read_json, write_json
from common.globals_args import fn_graph_file

mid_to_names_dict = read_json(fn_graph_file.cache_mid_to_names)


def get_names(instance_str):
    if instance_str in mid_to_names_dict:
        mid_dict = mid_to_names_dict[instance_str]
    else:
        mid_dict = dict()
        mid_dict['answer_id'] = instance_str
        if isinstance(instance_str, str):  # mid = 'm.02hwgbx'
            labels = freebase_kb_interface.get_names(instance_str)
            print('#labels:\t', instance_str, labels)
            mid_dict['answer'] = list(labels)
            alias = freebase_kb_interface.get_alias(instance_str)
            print('#alias:\t', instance_str, alias)
            mid_dict['aliases'] = list(alias)
        else:
            mid_dict['answer'] = [instance_str]
            mid_dict['aliases'] = [instance_str]
        mid_to_names_dict[instance_str] = mid_dict
    return mid_dict


def write_cache_json():
    write_json(mid_to_names_dict, fn_graph_file.cache_mid_to_names)