Beispiel #1
0
def run_grounded_node_grounding_freebase(structure_with_ungrounded_graphq_file,
                                         output_file):
    '''
     #2.1
    function: 1.0 ungrounded query  ->  2.1 grounded query
    input: structure_ungrounded_graphq_file
    :return: grounded graph with entity linking
    '''
    from grounding._2_1_grounded_graph import node_linking_interface_freebase
    from grounding._2_1_grounded_graph.grounded_graph_2_1_generation import generate_grounded_graph_interface
    structure_list = read_structure_file(structure_with_ungrounded_graphq_file)
    for structure in structure_list:
        print(structure.qid)
        for i, ungrounded_graph in enumerate(
                structure.get_ungrounded_graph_forest()):
            if i == len(structure.get_ungrounded_graph_forest()) - 1:
                grounding_result_list = []
                for node in ungrounded_graph.nodes:
                    grounding_result_list.append(
                        (node,
                         node_linking_interface_freebase.node_linking(
                             qid=structure.qid, node=node)))
                grouned_graph_list = generate_grounded_graph_interface(
                    ungrounded_graph=ungrounded_graph,
                    grounding_result_list=grounding_result_list)
                ungrounded_graph.set_grounded_linking(grounding_result_list)
                ungrounded_graph.set_grounded_graph_forest(grouned_graph_list)
    write_structure_file(structure_list, output_file)
def compute_all_questions_recall(input_file):
    '''
    # oracle all recall by max f1
    :param input_file:
    :return:
    '''
    all_data_path = os.listdir(input_file)
    all_recall = 0
    error_list = []
    for path in all_data_path:
        try:
            structure_list = read_structure_file(input_file + path)
            max_f1 = 0
            question = None
            for structure in structure_list:
                question = structure.question
                for ungrounded_graph in structure.ungrounded_graph_forest:
                    for grounded_graph in ungrounded_graph.get_grounded_graph_forest(
                    ):
                        if max_f1 < grounded_graph.f1_score:
                            max_f1 = grounded_graph.f1_score
            all_recall += max_f1
            if max_f1 != 1.0:
                print(('%s\t%s\t%s') % (path, question, str(max_f1)))
        except Exception as e:
            print(e)
            error_list.append(path)
    print('#error_list:\t', error_list)
    print(all_recall, len(all_data_path))
def generate_paths_graphq_interface_from_graph_2_1(
        structure_with_2_1_grounded_graph_file):
    structure_list = read_structure_file(
        structure_with_2_1_grounded_graph_file)
    error_qid_list = []

    # structure_list = structure_list[0:1000]
    # structure_list = structure_list[1000:2000]
    # structure_list = structure_list[2000:3000]
    # structure_list = structure_list[3000:4000]
    # structure_list = structure_list[4000:5000]
    # structure_list = structure_list[5000:6000]
    # structure_list = structure_list[6000:7000]
    # structure_list = structure_list[7000:8000]
    # structure_list = structure_list[8000:9000]
    # structure_list = structure_list[9000:10000]

    for i, structure in enumerate(structure_list):
        for ungrounded_graph in structure.ungrounded_graph_forest:
            for _2_1_grounded_graph in ungrounded_graph.get_grounded_graph_forest(
            ):
                try:
                    entities_list = grounding_utils.convert_2_1_graph_to_qid_entities(
                        _2_1_graph=_2_1_grounded_graph)
                    if len(entities_list) == 1:
                        print(
                            ('%s\t%s\t%s') %
                            (structure.qid, 'composition', str(entities_list)))
                    elif len(entities_list) == 2:
                        print(
                            ('%s\t%s\t%s') %
                            (structure.qid, 'conjunction', str(entities_list)))
                except Exception as e:
                    error_qid_list.append(structure.qid)
    print('#error:\t', error_qid_list)
Beispiel #4
0
def run_grounding_graph_question_match_minus(input_file_folder):
    '''path candidate grounding graph'''
    import os
    from common import utils
    for path in os.listdir(input_file_folder):
        print(path)
        structure_with_grounded_graphq_file = input_file_folder + path
        structure_list = read_structure_file(
            structure_with_grounded_graphq_file)
        all_score = []
        for structure in structure_list:
            for ungrounded_graph in structure.ungrounded_graph_forest:
                for grounded_graph in ungrounded_graph.get_grounded_graph_forest(
                ):
                    all_score.append(grounded_graph.score)

        all_score_guiyi = utils.Normalize(all_score)
        score_guiyi = dict()
        for i, score_ori in enumerate(all_score):
            score_guiyi[score_ori] = all_score_guiyi[i]

        for structure in structure_list:
            # qid = structure.qid
            for ungrounded_graph in structure.ungrounded_graph_forest:
                for grounded_graph in ungrounded_graph.get_grounded_graph_forest(
                ):
                    # 4.跑 total - score_guiyi
                    # grounded_graph.total_score = score_guiyi[grounded_graph.score] + qmi.get_score(qid, grounded_graph.denotation)
                    grounded_graph.total_score = grounded_graph.total_score - score_guiyi[
                        grounded_graph.score]
                    # return
        write_structure_file(structure_list,
                             structure_with_grounded_graphq_file)
Beispiel #5
0
def run_grounded_node_grounding_dbpedia_gold(
        structure_with_ungrounded_graphq_file, output_file):
    '''
     #2.1
    function: 1.0 ungrounded query  ->  2.1 grounded query
    input: structure_ungrounded_graphq_file
    :return: grounded graph with entity linking
    '''
    from datasets_interface.question_interface import lcquad_1_0_interface
    from grounding._2_1_grounded_graph.grounded_graph_2_1_generation import generate_grounded_graph_interface
    structure_list = read_structure_file(structure_with_ungrounded_graphq_file)
    for structure in structure_list:
        print(structure.qid)
        for i, ungrounded_graph in enumerate(
                structure.get_ungrounded_graph_forest()):
            if i == len(structure.get_ungrounded_graph_forest()) - 1:
                grounding_result_list = []
                for node in ungrounded_graph.nodes:
                    # (node(barbaro), {'en.barbaro': 1.6}), get_el_result(question=structure.question, nid=node.nid)
                    grounding_result_list.append(
                        (node,
                         lcquad_1_0_interface.
                         get_topic_entities_list_by_question_and_nodemention(
                             question=structure.question,
                             mention=node.friendly_name)))
                grouned_graph_list = generate_grounded_graph_interface(
                    ungrounded_graph=ungrounded_graph,
                    grounding_result_list=grounding_result_list)
                ungrounded_graph.set_grounded_linking(grounding_result_list)
                ungrounded_graph.set_grounded_graph_forest(grouned_graph_list)
    write_structure_file(structure_list, output_file)
def computed_every_grounded_graph_f1_cwq(input_file):
    from datasets_interface.question_interface import complexwebquestion_interface
    all_structure_path = os.listdir(input_file)
    error_list = []
    for structure_path in all_structure_path:
        structure_with_grounded_graphq_file = input_file + structure_path
        print(structure_path)
        try:
            structure_list = read_structure_file(
                structure_with_grounded_graphq_file)
            for structure in structure_list:
                gold_answer_mid_set = complexwebquestion_interface.get_answers_by_question(
                    structure.question)
                for ungrounded_graph in structure.ungrounded_graph_forest:
                    for grounded_graph in ungrounded_graph.get_grounded_graph_forest(
                    ):
                        system_denotation_set = set(grounded_graph.denotation)
                        recall, precision, f1 = sempre_evaluation.computeF1(
                            gold_answer_mid_set, system_denotation_set)
                        grounded_graph.f1_score = f1
                        grounded_graph.recall_score = recall
                        grounded_graph.precision_score = precision
            write_structure_file(structure_list, input_file + structure_path)
        except Exception as e:
            print('error')
            error_list.append(structure_path)
    print('error_list:\t', error_list)
def train_data_generation_samestructure_graphq(propertys, files,
                                               qid_abstractquestions):
    data_for_train_list = list()
    for i, file in enumerate(files):
        print(i, file)
        data = read_structure_file(file)
        qid = file.split('/')[-1].split('.')[0]

        if len(qid_abstractquestions[qid]) == 0:
            continue

        negatives = list()
        j = 0
        # join=True
        for structure in data:
            gold_path = []
            predicates = []
            # for edge in structure.gold_graph_query.edges:
            #     gold_path.append(edge.relation)
            #     predicates.append(edge.relation)
            for edge in structure.gold_sparql_query['edges']:
                gold_path.append(edge['relation'])
                predicates.append(edge['relation'])

            gold_path.sort()
            gold_path = '\t'.join(gold_path)
            for ungrounded_graph in structure.ungrounded_graph_forest:
                for grounded_graph in ungrounded_graph.grounded_graph_forest:
                    path = grounded_graph.key_path
                    ps = path.split('\t')
                    ps.sort()
                    path = '\t'.join(ps)
                    if j < model_parameters.neg_size and len(ps) == len(
                            predicates) and path != gold_path:
                        negatives.append(path)
                        j += 1
        if j > 0:
            if j < model_parameters.neg_size:
                while j < model_parameters.neg_size:
                    candidate = list()
                    for i in range(len(predicates)):
                        candidate.append(propertys[random.randint(
                            0,
                            len(propertys) - 1)])
                    candidate.sort()
                    candidate = "\t".join(candidate)
                    if candidate != gold_path and candidate not in negatives:
                        negatives.append(candidate)
                        j += 1
            one = dict()
            one["qid"] = qid
            one["abstractquestion"] = (qid_abstractquestions[qid])
            one["gold_path"] = gold_path
            one["negatives"] = negatives
            data_for_train_list.append(one)
        else:
            print('not join', qid)
    write_json(
        data_for_train_list, fn_graph_file.path_match_dir +
        "data_for_trainorval_list_samestructure.json")
def computed_every_grounded_graph_f1_graphq(input_file):
    from datasets_interface.question_interface import graphquestion_interface
    for structure_path in os.listdir(input_file):
        structure_with_grounded_graphq_file = input_file + structure_path
        print(structure_path)
        structure_list = read_structure_file(
            structure_with_grounded_graphq_file)
        for structure in structure_list:
            gold_answers_mid_set = graphquestion_interface.get_answers_mid_by_question(
                structure.question)
            for ungrounded_graph in structure.ungrounded_graph_forest:
                for grounded_graph in ungrounded_graph.get_grounded_graph_forest(
                ):
                    new_system_answers_list = []
                    for system_answer in set(grounded_graph.denotation):
                        if isinstance(system_answer, int):
                            new_system_answers_list.append(str(system_answer))
                        else:
                            new_system_answers_list.append(system_answer)
                    recall, precision, f1 = sempre_evaluation.computeF1(
                        gold_answers_mid_set, new_system_answers_list)
                    grounded_graph.f1_score = f1
                    grounded_graph.recall_score = recall
                    grounded_graph.precision_score = precision
                    if f1 > 0:
                        print(
                            structure_path, f1
                        )  # print(structure_path, gold_answers_mid_set, new_system_answers_list, f1)
            structure.gold_answer = gold_answers_mid_set  # update answers by answer mid list   ["Kimberly-Clark"]  ['en.kimberly-clark']
        write_structure_file(structure_list, input_file + structure_path)
Beispiel #9
0
def run_grounding_graph_path_match(input_file_folder):
    '''path candidate grounding graph'''
    from grounding.ranking.path_match_nn.path_match_interface import PathMatchByLexicalNN
    import os
    from parsing.parsing_utils import extract_importantwords_from_question
    all_data_path = os.listdir(input_file_folder)
    pml = PathMatchByLexicalNN()
    for path in all_data_path:
        print(path)
        structure_with_grounded_graphq_file = input_file_folder + path
        structure_list = read_structure_file(
            structure_with_grounded_graphq_file)
        for structure in structure_list:
            question = structure.question
            for ungrounded_graph in structure.ungrounded_graph_forest:
                importantwords_list = extract_importantwords_from_question(
                    question=question, ungrounded_graph=ungrounded_graph)
                print(importantwords_list,
                      len(ungrounded_graph.get_grounded_graph_forest()))
                for grounded_graph in ungrounded_graph.get_grounded_graph_forest(
                ):
                    grounded_graph.score = pml.get_path_pro(
                        grounded_graph.key_path, importantwords_list
                    )  # '\t'.join(grounded_graph.key_path),
                    print(grounded_graph.key_path, importantwords_list,
                          grounded_graph.score)
        write_structure_file(structure_list,
                             structure_with_grounded_graphq_file)
Beispiel #10
0
def run_grounded_graph_generation_by_structure_transformation(
        structure_with_grounded_graphq_node_grounding_file, output_file):
    from grounding._2_2_grounded_graph_offline import graph_2_1_to_2_2_by_transfer
    from grounding.grounded_graph_to_sparql import grounded_graph_to_sparql_CWQ

    def count_denotation_to_num(grounded_graph):
        '''
        # counting
        # how many softwares are developed by google?
        '''
        num = 0
        denotation_set = grounded_graph.denotation
        if denotation_set is not None:
            num = len(denotation_set)
        return [num]

    structure_list = read_structure_file(
        structure_with_grounded_graphq_node_grounding_file)
    new_structure_list = []
    error_qid_list = []
    for i, structure in enumerate(structure_list):
        if str(structure.qid) + '.json' in os.listdir(output_file): continue
        new_structure_list.clear()
        print(i, structure.qid, structure.question)
        is_print = False
        for ungrounded_graph in structure.ungrounded_graph_forest:
            grounded_graph_forest = []
            for _2_1_grounded_graph in ungrounded_graph.get_grounded_graph_forest(
            ):
                try:
                    grounded_graph_forest.extend(
                        graph_2_1_to_2_2_by_transfer.
                        generate_candidates_by_2_1_grounded_graph_interface(
                            _2_1_grounded_graph=_2_1_grounded_graph))
                except Exception as e:
                    print('#Error:', structure.qid, e)
                    error_qid_list.append(structure.qid)
                # break
            if len(grounded_graph_forest) > 0:
                is_print = True
                print('#Size:', len(grounded_graph_forest))
            for z in range(len(grounded_graph_forest)):
                grounded_graph_forest[
                    z].grounded_query_id = ungrounded_graph.ungrounded_query_id * 100000 + z
                grounded_graph_forest[
                    z].sparql_query = grounded_graph_to_sparql_CWQ(
                        grounded_graph_forest[z])
                if structure.function == 'count':
                    grounded_graph_forest[
                        z].denotation = count_denotation_to_num(
                            grounded_graph_forest[z])
            ungrounded_graph.set_grounded_graph_forest(grounded_graph_forest)
        if is_print:
            new_structure_list.append(structure)
            write_structure_file(new_structure_list,
                                 output_file + str(structure.qid) + '.json')
    print('Error qid list:', error_qid_list)
def generate_paths_graphq_interface_from_graph_2_1_graphq(
        structure_with_2_1_grounded_graph_file):
    def is_exist(question_type=None, entities_or_literals=None):
        from grounding import grounding_args
        blag = 0
        filename_1 = question_type
        filename_2 = None
        if len(entities_or_literals) == 1:
            filename_1 += '_' + entities_or_literals[0][
                1] + '_' + entities_or_literals[0][0]
        elif len(entities_or_literals) == 2:
            filename_1 += '_' + entities_or_literals[0][
                1] + '_' + entities_or_literals[0][0]
            filename_1 += '_' + entities_or_literals[1][
                1] + '_' + entities_or_literals[1][0]
            filename_2 = question_type
            filename_2 += '_' + entities_or_literals[1][
                1] + '_' + entities_or_literals[1][0]
            filename_2 += '_' + entities_or_literals[0][
                1] + '_' + entities_or_literals[0][0]
        if filename_1 in grounding_args.oracle_all_files_path_names:
            blag = 1
        elif filename_2 is not None and filename_2 in grounding_args.oracle_all_files_path_names:
            blag = 1
        return blag

    structure_list = read_structure_file(
        structure_with_2_1_grounded_graph_file)
    error_qid_list = []
    # structure_list = structure_list[0:1000]
    for i, structure in enumerate(structure_list):
        for ungrounded_graph in structure.ungrounded_graph_forest:
            for _2_1_grounded_graph in ungrounded_graph.get_grounded_graph_forest(
            ):
                # try:
                entities_list = grounding_utils.convert_2_1_graph_to_qid_entities(
                    _2_1_graph=_2_1_grounded_graph)
                if len(entities_list) == 1:
                    blag = is_exist(question_type='composition',
                                    entities_or_literals=entities_list)
                    if blag == 0:
                        print(
                            ('%s\t%s\t%s\t%d') % (structure.qid, 'composition',
                                                  str(entities_list), blag))
                elif len(entities_list) == 2:
                    blag = is_exist(question_type='conjunction',
                                    entities_or_literals=entities_list)
                    if blag == 0:
                        print(
                            ('%s\t%s\t%s\t%d') % (structure.qid, 'conjunction',
                                                  str(entities_list), blag))
                # except Exception as e:
                #     error_qid_list.append(structure.qid)
    print('#error:\t', error_qid_list)
Beispiel #12
0
def computed_every_grounded_graph_f1_webq_name(input_file, answer_file,
                                               mid_to_names_file):
    # from datasets_interface.freebase import webquestions_interface
    # from evaluation.webq_denotation import webq_mid_to_names_process
    #------------------------------------------------
    #read qid-to-answers
    qid_to_answers_dict = dict()
    lines = read_list(answer_file)
    for line in lines:
        cols = line.split('\t')
        qid_to_answers_dict[cols[0]] = eval(cols[2])
    #------------------------------------------------
    # mid to names dict
    mid_to_names_dict = dict()
    lines = read_list(mid_to_names_file)
    for line in lines:
        cols = line.split('\t')
        mid = cols[1]
        names = list(eval(cols[2]))
        mid_to_names_dict[mid] = names
    #------------------------------------------------
    all_structure_path = os.listdir(input_file)
    for structure_path in all_structure_path:
        structure_with_grounded_graphq_file = input_file + structure_path
        structure_list = read_structure_file(
            structure_with_grounded_graphq_file)
        for structure in structure_list:
            qid = structure.qid
            gold_answer_names_set = evaluation_utils.search_for_answers_by_id(
                qid, qid_to_answers_dict)

            print(structure_path, '#gold:\t', gold_answer_names_set)
            for ungrounded_graph in structure.ungrounded_graph_forest:
                for grounded_graph in ungrounded_graph.get_grounded_graph_forest(
                ):
                    system_denotation_names_set = set()
                    for denotation_mid in grounded_graph.denotation:
                        denotation_name = evaluation_utils.get_name_by_mid(
                            denotation_mid, mid_to_names_dict)
                        print('###denotation:\t', denotation_mid,
                              denotation_name)
                        if denotation_name is not None:
                            system_denotation_names_set.add(denotation_name)
                        else:
                            print(denotation_mid, '#####error!!!',
                                  denotation_name)
                    print('#gold:\t', gold_answer_names_set, '#system:\t',
                          system_denotation_names_set)
                    recall, precision, f1 = sempre_evaluation.computeF1(
                        gold_answer_names_set, system_denotation_names_set)
                    if f1 > 0.0:
                        print('#result:\t', f1)
                    grounded_graph.f1_score = f1
        write_structure_file(structure_list, input_file + structure_path)
def generate_qid_abstractquestion():
    # dev_2_1 = read_structure_file(dev_structure_with_2_1_grounded_graph_file)
    test_2_1 = read_structure_file(test_structure_with_2_1_grounded_graph_file)
    train_2_1 = read_structure_file(
        train_structure_with_2_1_grounded_graph_file)
    qid_abstractquestion = dict()
    any_2_1_dict = {'train': train_2_1, 'test': test_2_1}  #'dev': dev_2_1
    for key in any_2_1_dict:
        any_2_1 = any_2_1_dict[key]
        for one in any_2_1:
            qid = key + "_" + str(one.qid)
            question = one.question
            for ungrounded_graph in one.ungrounded_graph_forest:
                question_ = question
                for node in ungrounded_graph.nodes:
                    if node.node_type == 'entity':
                        question_ = question_.replace(node.friendly_name,
                                                      '<e>')
                qid_abstractquestion[qid] = question_
                break
    # print(len(qid_abstractquestions))
    write_json(qid_abstractquestion,
               data_question_match + 'qid_abstractquestion.json')
    return qid_abstractquestion
Beispiel #14
0
def computed_every_grounded_graph_f1_graphq(input_file):

    from grounding.grounding_args import test_qid_to_answers_mid_dict, train_qid_to_answers_mid_dict
    for structure_path in os.listdir(input_file):  #all_structure_path
        structure_with_grounded_graphq_file = input_file + structure_path
        print(structure_path)
        structure_list = read_structure_file(
            structure_with_grounded_graphq_file)

        for structure in structure_list:
            gold_answers_mid_set = []
            qid = structure.qid
            if qid in test_qid_to_answers_mid_dict:
                gold_answers_mid_set = test_qid_to_answers_mid_dict[qid]
            elif qid in train_qid_to_answers_mid_dict:
                gold_answers_mid_set = train_qid_to_answers_mid_dict[qid]

            #[80] -> ['80']
            new_gold_answers_set = set()
            for gold_answer in gold_answers_mid_set:
                if isinstance(gold_answer, int):
                    new_gold_answers_set.add(str(gold_answer))
                else:
                    new_gold_answers_set.add(gold_answer)
            gold_answers_mid_set = list(new_gold_answers_set)

            for ungrounded_graph in structure.ungrounded_graph_forest:
                for grounded_graph in ungrounded_graph.get_grounded_graph_forest(
                ):
                    system_denotation_set = set(grounded_graph.denotation)
                    new_system_answers_set = set()
                    for system_answer in system_denotation_set:
                        if isinstance(system_answer, int):
                            new_system_answers_set.add(str(system_answer))
                        else:
                            new_system_answers_set.add(system_answer)
                    new_system_answers_set = list(new_system_answers_set)

                    recall, precision, f1 = sempre_evaluation.computeF1(
                        gold_answers_mid_set, new_system_answers_set)
                    print(structure_path, gold_answers_mid_set,
                          new_system_answers_set, f1)
                    grounded_graph.f1_score = f1
                    if f1 > 0:
                        print(f1)
            # update answers by answer mid list   ["Kimberly-Clark"]  ['en.kimberly-clark']
            structure.gold_answer = gold_answers_mid_set
        write_structure_file(structure_list, input_file + structure_path)
Beispiel #15
0
def grounded_graphes_by_score_standard_ywsun_prediction_test(input_file):
    from common.hand_files import write_json
    all_structure_path = os.listdir(input_file)
    # all_f1_score = 0
    prediction_list = []
    for structure_path in all_structure_path:
        print(structure_path)
        structure_list = read_structure_file(input_file + structure_path)
        score_to_queryid_sparql = collections.defaultdict(list)
        # grounded_query_id_to_f1_denotation = collections.defaultdict(set)
        grounded_query_id_to_denotation = collections.defaultdict(set)
        qid = None
        for structure in structure_list:
            qid = structure.qid
            for ungrounded_graph in structure.ungrounded_graph_forest:
                # ungrounded_graph_edges_num = len(ungrounded_graph.edges)
                for grounded_graph in ungrounded_graph.get_grounded_graph_forest(
                ):
                    # grounded_graph_edges_num = len(grounded_graph.edges)
                    # edge constaints
                    # if grounded_graph_edges_num != ungrounded_graph_edges_num: continue
                    # score_to_queryid_sparql[grounded_graph.score].append(grounded_graph.grounded_query_id) #word level matcher
                    score_to_queryid_sparql[grounded_graph.total_score].append(
                        grounded_graph.grounded_query_id)
                    # grounded_query_id_to_f1_denotation[grounded_graph.grounded_query_id] = grounded_graph.f1_score
                    grounded_query_id_to_denotation[
                        grounded_graph.
                        grounded_query_id] = grounded_graph.denotation
        answers = []
        score_to_queryid_sparql = dict(
            sorted(score_to_queryid_sparql.items(),
                   key=lambda d: d[0],
                   reverse=True))
        for totalscore, grounded_query_ids in score_to_queryid_sparql.items():
            for grounded_query_id in grounded_query_ids:
                answers = grounded_query_id_to_denotation[grounded_query_id]
                # all_f1_score += f1_score
                # top1id = grounded_query_id
                break
            break
        q_dict = dict()
        q_dict['ID'] = qid
        q_dict['answers_id'] = answers
        prediction_list.append(q_dict)
    write_json(prediction_list,
               './20191113_cwq_wo_wordlevel_prediction_test.json')
Beispiel #16
0
def generate_cwq_train_candidates_paths_from_structure(
        cwq_gold_path_list, train_candidates_sp_path_top_path, output_file):
    files = os.listdir(train_candidates_sp_path_top_path)
    new_cwq_path_list = []
    for one in cwq_gold_path_list:
        print(one['qid'])
        if str(one['qid']) + '.json' not in files:
            continue
        if 'path' not in one['gold']:
            continue
        new_one = dict()
        new_one['qid'] = one['qid']
        new_one['question_normal'] = one['question_normal']
        new_one['gold'] = one['gold']
        test_candidates_sp = read_structure_file(
            train_candidates_sp_path_top_path + str(one['qid']) + '.json')
        test_candidates_sp = test_candidates_sp[0]
        ungrounded_graph = test_candidates_sp.ungrounded_graph_forest[-1]
        hop1, hop2, hop3, hop4 = score12_utils.grounded_graph_list_to_path_list(
            ungrounded_graph.get_grounded_graph_forest())
        hops = []
        if len(hop1) > 0:
            new_one['gold']['hop1'] = hop1
            hops += hop1
        if len(hop2) > 0:
            new_one['gold']['hop2'] = hop2
            hops += hop2
        if len(hop3) > 0:
            new_one['gold']['hop3'] = hop3
            hops += hop3
        if len(hop4) > 0:
            new_one['gold']['hop4'] = hop4
            hops += hop4
        goldpath = None
        for hop in hops:
            for i, temp_goldpath in enumerate(
                    new_one['gold']['reverse_paths_list']):
                if score12_utils.eq_paths(temp_goldpath, hop):
                    goldpath = temp_goldpath
                    break
        if goldpath is not None:
            new_one['gold']['path'] = goldpath
        del new_one['gold']['reverse_paths_list']
        new_cwq_path_list.append(new_one)
    write_json(new_cwq_path_list, fn_cwq_file.score12_match + output_file)
Beispiel #17
0
def show_f1_given_qids(input_file, qids):
    qid_f1 = dict()
    all_data_path = os.listdir(input_file)
    for path in all_data_path:
        if path.split('.')[0] in qids:
            structure_with_grounded_graphq_file = input_file + path
            structure_list = read_structure_file(
                structure_with_grounded_graphq_file)
            print(path)
            max_f1 = 0
            for structure in structure_list:
                for ungrounded_graph in structure.ungrounded_graph_forest:
                    for grounded_graph in ungrounded_graph.get_grounded_graph_forest(
                    ):
                        if max_f1 < grounded_graph.f1_score:
                            max_f1 = grounded_graph.f1_score
            qid_f1[path.split('.')[0]] = max_f1
    write_json(qid_f1, 'qid_f1.json')
Beispiel #18
0
def run_grounding_graph_add_question_match(input_file_folder):
    '''path candidate grounding graph'''
    all_data_path = os.listdir(input_file_folder)
    from grounding.ranking.path_match_sentence_level.question_match_interface import QuestionMatchInterface
    qmi = QuestionMatchInterface()
    for path in all_data_path:
        print(path)
        structure_with_grounded_graphq_file = input_file_folder + path
        structure_list = read_structure_file(
            structure_with_grounded_graphq_file)
        for structure in structure_list:
            qid = structure.qid
            for ungrounded_graph in structure.ungrounded_graph_forest:
                for grounded_graph in ungrounded_graph.get_grounded_graph_forest(
                ):
                    grounded_graph.total_score = grounded_graph.score + qmi.get_score(
                        qid, grounded_graph.denotation)
        write_structure_file(structure_list,
                             structure_with_grounded_graphq_file)
Beispiel #19
0
def run_grounding_graph_guiyi_add_question_match(input_file_folder):
    '''path candidate grounding graph'''
    import os
    from common import utils
    from grounding.ranking.path_match_nn.question_match_interface import QuestionMatchInterface
    qmi = QuestionMatchInterface()
    for path in os.listdir(input_file_folder):
        print(path)
        structure_with_grounded_graphq_file = input_file_folder + path
        structure_list = read_structure_file(
            structure_with_grounded_graphq_file)
        all_score = []
        for structure in structure_list:
            for ungrounded_graph in structure.ungrounded_graph_forest:
                for grounded_graph in ungrounded_graph.get_grounded_graph_forest(
                ):
                    all_score.append(grounded_graph.score)

        all_score_guiyi = utils.Normalize(all_score)
        score_guiyi = dict()
        for i, score_ori in enumerate(all_score):
            score_guiyi[score_ori] = all_score_guiyi[i]

        for structure in structure_list:
            qid = structure.qid
            for ungrounded_graph in structure.ungrounded_graph_forest:
                for grounded_graph in ungrounded_graph.get_grounded_graph_forest(
                ):
                    # 3.单独跑 question match
                    # grounded_graph.total_score = qmi.get_score(qid, grounded_graph.denotation)
                    # if grounded_graph.total_score > 0:
                    #     print ('\t\t', grounded_graph.total_score)
                    # 4.单独跑 question match
                    grounded_graph.score = qmi.get_score(
                        qid, grounded_graph.denotation)
                    if grounded_graph.score > 0:
                        print('\t\t', grounded_graph.score)
                    # 4.跑word match+question match
                    # grounded_graph.total_score = score_guiyi[grounded_graph.score] + qmi.get_score(qid, grounded_graph.denotation)
                    # return
        write_structure_file(structure_list,
                             structure_with_grounded_graphq_file)
Beispiel #20
0
def run_candidate_graph_generation(structure_with_1_ungrounded_lcquad_file, output_file, q_mode='lcquad'):
    from method_ir.grounding import graph_2_1_to_2_2_ir
    from method_sp.grounding import grounded_graph_to_sparql
    from method_sp.grounding import sparql_to_denotation
    import os
    structure_list = read_structure_file(structure_with_1_ungrounded_lcquad_file)
    error_qid_list = []
    for _, structure in enumerate(structure_list):
        if str(structure.qid) + '.json' in os.listdir(output_file):
            continue
        print(structure.qid)
        compositionality_type = structure.compositionality_type
        for j, ungrounded_graph in enumerate(structure.ungrounded_graph_forest):
            if j != len(structure.ungrounded_graph_forest) - 1:
                continue
            grounded_graph_forest = []
            for _2_1_grounded_graph in ungrounded_graph.get_grounded_graph_forest():
                try:
                    if q_mode == 'graphq':
                        grounded_graph_forest.extend(graph_2_1_to_2_2_ir.get_oracle_graphs_by_2_1_graph_graphq(_2_1_grounded_graph=_2_1_grounded_graph, qtype=compositionality_type))
                    elif q_mode == 'cwq':
                        grounded_graph_forest.extend(graph_2_1_to_2_2_ir.get_oracle_graphs_by_2_1_graph_cwq(_2_1_grounded_graph=_2_1_grounded_graph, qtype=compositionality_type))
                    elif q_mode == 'lcquad':
                        grounded_graph_forest.extend(graph_2_1_to_2_2_ir.get_oracle_graphs_by_2_1_graph_lcquad(_2_1_grounded_graph=_2_1_grounded_graph, qtype=compositionality_type))
                except Exception as e:
                    print('#Error:', structure.qid, e)
                    grounded_graph_forest.clear()
                    error_qid_list.append(structure.qid)
                break
            for z in range(len(grounded_graph_forest)):
                grounded_graph_forest[z].grounded_query_id = ungrounded_graph.ungrounded_query_id * 100000 + z
                grounded_graph_forest[z].sparql_query = grounded_graph_to_sparql.grounded_graph_to_sparql(grounded_graph=grounded_graph_forest[z],
                                                                                                          q_function=structure.function,
                                                                                                          q_compositionality_type=structure.compositionality_type,
                                                                                                          q_mode=q_mode)
                grounded_graph_forest[z].denotation = sparql_to_denotation.set_denotation(grounded_graph=grounded_graph_forest[z],
                                                                                          q_compositionality_type=structure.compositionality_type)
            ungrounded_graph.set_grounded_graph_forest(grounded_graph_forest)
            print('#size:\t', len(grounded_graph_forest))
            if len(grounded_graph_forest) > 0:
                write_structure_file([structure], output_file + str(structure.qid) + '.json')
    print('Error qid list:', error_qid_list)
def computed_every_grounded_graph_f1_lcquad(input_file):
    from datasets_interface.question_interface import lcquad_1_0_interface
    for structure_path in os.listdir(input_file):
        structure_with_grounded_graphq_file = input_file + structure_path
        print(structure_path)
        structure_list = read_structure_file(
            structure_with_grounded_graphq_file)
        for structure in structure_list:
            gold_answer_mid_set = lcquad_1_0_interface.get_answers_by_question(
                structure.question)  #['http://dbpedia.org/resource/Colorado']
            for ungrounded_graph in structure.ungrounded_graph_forest:
                for grounded_graph in ungrounded_graph.get_grounded_graph_forest(
                ):
                    system_denotation_set = set(grounded_graph.denotation)
                    recall, precision, f1 = sempre_evaluation.computeF1(
                        gold_answer_mid_set, system_denotation_set)
                    grounded_graph.f1_score = f1
                    grounded_graph.recall_score = recall
                    grounded_graph.precision_score = precision
        write_structure_file(structure_list, input_file + structure_path)
Beispiel #22
0
def computed_every_grounded_graph_f1_webq_mid(input_file, answer_file):
    #read qid-to-answers
    all_structure_path = os.listdir(input_file)
    for structure_path in all_structure_path:
        structure_with_grounded_graphq_file = input_file + structure_path
        structure_list = read_structure_file(
            structure_with_grounded_graphq_file)
        for structure in structure_list:
            qid = structure.qid
            gold_answer_mid_set = evaluation_utils.search_for_answers_by_id(
                qid, qid_to_answers_dict)
            print(structure_path, gold_answer_mid_set)
            for ungrounded_graph in structure.ungrounded_graph_forest:
                for grounded_graph in ungrounded_graph.get_grounded_graph_forest(
                ):
                    system_denotation_set = set(grounded_graph.denotation)
                    recall, precision, f1 = sempre_evaluation.computeF1(
                        gold_answer_mid_set, system_denotation_set)
                    grounded_graph.f1_score = f1
        write_structure_file(structure_list, input_file + structure_path)
Beispiel #23
0
def computed_every_grounded_graph_f1_cwq(input_file):
    all_structure_path = os.listdir(input_file)
    for structure_path in all_structure_path:
        structure_with_grounded_graphq_file = input_file + structure_path
        print(structure_path)
        structure_list = read_structure_file(
            structure_with_grounded_graphq_file)
        for structure in structure_list:
            gold_answer_mid_set = evaluation_utils.get_gold_answers(
                structure.gold_answer)
            for ungrounded_graph in structure.ungrounded_graph_forest:
                for grounded_graph in ungrounded_graph.get_grounded_graph_forest(
                ):
                    system_denotation_set = set(grounded_graph.denotation)
                    recall, precision, f1 = sempre_evaluation.computeF1(
                        gold_answer_mid_set, system_denotation_set)
                    grounded_graph.f1_score = f1
                    if f1 > 0:
                        print(f1)
        write_structure_file(structure_list, input_file + structure_path)
def investigate_denotation_same():

    testqid_trainqid_bertmax = read_json(data_question_match +
                                         'testqid_trainqid_bertmax.json')
    qmi = QuestionMatchInterface()
    structure_2_2_files = '/2.2_test_span_transfer_wo_wordlevel/'
    all_data_path = os.listdir(output_path + structure_2_2_files)
    for path in all_data_path:
        print(path)
        test_qid = path.split('.')[0]
        test_qid = 'test_' + str(test_qid)
        # if 'test_'+str(test_qid) not in testqid_trainqid_bertmax:
        if test_qid not in testqid_trainqid_bertmax:
            continue
        # structure_with_grounded_graphq_file = output_path + structure_2_2_files + path
        structure_list = read_structure_file(output_path +
                                             structure_2_2_files + path)
        for structure in structure_list:
            for ungrounded_graph in structure.ungrounded_graph_forest:
                nodes = []
                for groundedgraph in ungrounded_graph.get_grounded_graph_forest(
                ):
                    nodes = groundedgraph.nodes
                    break
                # print(test_qid)
                # denotation = set(qmi.get_denotation_by_testqid_nodes(test_qid, nodes))
                denotation = set(
                    qmi.get_denotation_by_testqid_nodes_freebase(
                        test_qid, nodes))
                print('denotations:', denotation)
                # gold_mids = set()
                # for one in structure.gold_answer:
                #     gold_mids.add(one['answer_id'])
                #
                # if  (len(denotation-gold_mids)==0 and len(gold_mids-denotation)==0):
                #     print('oh no',test_qid)
                #     if test_qid in qmunique_qids:
                #         print('double oh no')
    write_json(
        qmi.testqid_correspondingtrainqid_denotations,
        data_question_match + 'testqid_correspondingtrainqid_denotations.json')
def generate_testset():
    testset = []
    test_2_1 = read_structure_file(test_structure_with_2_1_grounded_graph_file)
    train_predicate_qids = read_json(data_question_match +
                                     'train_predicate_qids.json')
    qid_abstractquestions = read_json(data_question_match +
                                      'qid_abstractquestion.json')
    train_abstractquestion = set()
    for predicate in train_predicate_qids:
        for qid in train_predicate_qids[predicate]:
            if qid in qid_abstractquestions:
                train_abstractquestion.add(qid_abstractquestions[qid])
    test_abstractquestions = set()
    for one in test_2_1:
        if 'test_' + str(one.qid) in qid_abstractquestions:
            abstractquestion = qid_abstractquestions['test_' + str(one.qid)]
            test_abstractquestions.add(abstractquestion)
    for abstractquestion in test_abstractquestions:
        for ta in train_abstractquestion:
            testset.append([abstractquestion, ta])
    write_json(testset, data_question_match + 'testset.json')
Beispiel #26
0
def computed_every_grounded_graph_f1_lcquad(input_file):
    from datasets_interface.question_interface import lcquad_1_0_interface
    all_structure_path = os.listdir(input_file)
    for structure_path in all_structure_path:
        structure_with_grounded_graphq_file = input_file + structure_path
        print(structure_path)
        structure_list = read_structure_file(
            structure_with_grounded_graphq_file)
        for structure in structure_list:
            gold_answer_mid_set = lcquad_1_0_interface.get_answers_by_question(
                structure.question)
            print('#gold answer:\t', gold_answer_mid_set)
            for ungrounded_graph in structure.ungrounded_graph_forest:
                for grounded_graph in ungrounded_graph.get_grounded_graph_forest(
                ):
                    system_denotation_set = set(grounded_graph.denotation)
                    recall, precision, f1 = sempre_evaluation.computeF1(
                        gold_answer_mid_set, system_denotation_set)
                    grounded_graph.f1_score = f1
                    if f1 > 0:
                        print(f1)
        write_structure_file(structure_list, input_file + structure_path)
Beispiel #27
0
def compute_all_questions_recall(input_file):
    '''
    # structure_with_2_2_grounded_graph_folder = output_file_folder + '/2.2_0_500/'
    # compute_recall(input_file=structure_with_2_2_grounded_graph_folder)
    :param input_file:
    :return:
    '''
    all_data_path = os.listdir(input_file)
    all_recall = 0
    for path in all_data_path:
        structure_with_grounded_graphq_file = input_file + path
        structure_list = read_structure_file(
            structure_with_grounded_graphq_file)
        max_f1 = 0
        for structure in structure_list:
            for ungrounded_graph in structure.ungrounded_graph_forest:
                for grounded_graph in ungrounded_graph.get_grounded_graph_forest(
                ):
                    if max_f1 < grounded_graph.f1_score:
                        max_f1 = grounded_graph.f1_score
        all_recall += max_f1
        print(('%s\t%s') % (path, str(max_f1)))
    print(all_recall, len(all_data_path))
def generate_paths_graphq_interface_from_lcquad_el(
        structure_with_2_1_grounded_graph_file):
    from datasets_interface.question_interface import lcquad_1_0_interface
    structure_list = read_structure_file(
        structure_with_2_1_grounded_graph_file)
    error_qid_list = []
    for i, structure in enumerate(structure_list):
        try:
            # entities_list = lcquad_1_0_interface.get_topic_entities_list_by_question(structure.question)
            entities_list = lcquad_1_0_interface.get_topic_entities_list_by_question_from_nn(
                structure.question)
            new_entities_list = []
            for entity in entities_list:
                new_entities_list.append([entity, 'entity'])
            if len(entities_list) == 1:
                print(('%s\t%s\t%s') %
                      (structure.qid, 'composition', str(new_entities_list)))
            elif len(entities_list) == 2:
                print(('%s\t%s\t%s') %
                      (structure.qid, 'conjunction', str(new_entities_list)))
        except Exception as e:
            error_qid_list.append(structure.qid)
    print('#error:\t', error_qid_list)
Beispiel #29
0
def run_grounding_graph_score12_match(input_file_folder, q_mode='lcquad'):
    """path candidate grounding graph"""
    from method_ir.grounding.path_match_score12.path_match_interface import PathMatchScore12
    path_match_score12 = PathMatchScore12(q_mode)
    for path in os.listdir(input_file_folder):
        structure_with_grounded_graphq_file = input_file_folder + path
        print(path)
        structure_list = read_structure_file(structure_with_grounded_graphq_file)
        for structure in structure_list:
            question = structure.question
            for j, ungrounded_graph in enumerate(structure.ungrounded_graph_forest):
                if j != len(structure.ungrounded_graph_forest) - 1: continue
                grounded_graph_list = ungrounded_graph.get_grounded_graph_forest()
                try:
                    bert_scores = path_match_score12.set_bert_score_score12(question_normal=question, grounded_graph_forest_list=grounded_graph_list)
                    for grounded_graph, bert_score in zip(grounded_graph_list, bert_scores):
                        grounded_graph.score = bert_score
                except Exception as e:
                    for grounded_graph in grounded_graph_list:
                        grounded_graph.score = 0.0
                    print('error')
        write_structure_file(structure_list, structure_with_grounded_graphq_file)
    print('over')
Beispiel #30
0
def run_grounded_node_grounding_dbpedia(structure_with_ungrounded_graphq_file,
                                        output_file,
                                        linking_is_gold=False):
    '''
    function: 1.0 ungrounded query  ->  2.1 grounded query
    input: structure_ungrounded_graphq_file
    :return: grounded graph with entity linking
    '''
    from datasets_interface.question_interface import lcquad_1_0_interface
    from method_sp.grounding._2_1_grounded_graph.node_linking import node_linking_interface_dbpedia
    from method_sp.grounding._2_1_grounded_graph.grounded_graph_2_1_generation import generate_grounded_graph_interface
    structure_list = read_structure_file(structure_with_ungrounded_graphq_file)
    for structure in structure_list:
        print(structure.qid)
        for i, ungrounded_graph in enumerate(
                structure.get_ungrounded_graph_forest()):
            if i != len(structure.get_ungrounded_graph_forest()) - 1:
                continue
            grounding_result_list = []
            for node in ungrounded_graph.nodes:
                if linking_is_gold:
                    result_dict = lcquad_1_0_interface.get_topic_entities_by_question_and_mention(
                        question=structure.question,
                        mention=node.friendly_name)
                    grounding_result_list.append((node, result_dict))
                else:
                    grounding_result_list.append(
                        (node,
                         node_linking_interface_dbpedia.node_linking(
                             node=node)))
            grouned_graph_list = generate_grounded_graph_interface(
                ungrounded_graph=ungrounded_graph,
                grounding_result_list=grounding_result_list)
            ungrounded_graph.set_grounded_linking(grounding_result_list)
            ungrounded_graph.set_grounded_graph_forest(grouned_graph_list)
    write_structure_file(structure_list, output_file)