コード例 #1
0
ファイル: kbcqa_evaluation.py プロジェクト: yayuanzi8/SPARQA
def computed_every_grounded_graph_f1_webq_name(input_file, answer_file,
                                               mid_to_names_file):
    # from datasets_interface.freebase import webquestions_interface
    # from evaluation.webq_denotation import webq_mid_to_names_process
    #------------------------------------------------
    #read qid-to-answers
    qid_to_answers_dict = dict()
    lines = read_list(answer_file)
    for line in lines:
        cols = line.split('\t')
        qid_to_answers_dict[cols[0]] = eval(cols[2])
    #------------------------------------------------
    # mid to names dict
    mid_to_names_dict = dict()
    lines = read_list(mid_to_names_file)
    for line in lines:
        cols = line.split('\t')
        mid = cols[1]
        names = list(eval(cols[2]))
        mid_to_names_dict[mid] = names
    #------------------------------------------------
    all_structure_path = os.listdir(input_file)
    for structure_path in all_structure_path:
        structure_with_grounded_graphq_file = input_file + structure_path
        structure_list = read_structure_file(
            structure_with_grounded_graphq_file)
        for structure in structure_list:
            qid = structure.qid
            gold_answer_names_set = evaluation_utils.search_for_answers_by_id(
                qid, qid_to_answers_dict)

            print(structure_path, '#gold:\t', gold_answer_names_set)
            for ungrounded_graph in structure.ungrounded_graph_forest:
                for grounded_graph in ungrounded_graph.get_grounded_graph_forest(
                ):
                    system_denotation_names_set = set()
                    for denotation_mid in grounded_graph.denotation:
                        denotation_name = evaluation_utils.get_name_by_mid(
                            denotation_mid, mid_to_names_dict)
                        print('###denotation:\t', denotation_mid,
                              denotation_name)
                        if denotation_name is not None:
                            system_denotation_names_set.add(denotation_name)
                        else:
                            print(denotation_mid, '#####error!!!',
                                  denotation_name)
                    print('#gold:\t', gold_answer_names_set, '#system:\t',
                          system_denotation_names_set)
                    recall, precision, f1 = sempre_evaluation.computeF1(
                        gold_answer_names_set, system_denotation_names_set)
                    if f1 > 0.0:
                        print('#result:\t', f1)
                    grounded_graph.f1_score = f1
        write_structure_file(structure_list, input_file + structure_path)
コード例 #2
0
def get_all_relation_domain_range():
    human_relation_list = read_list(
        '../dataset/resources_cwq/dataset_freebase_latest/freebase_relations')
    error_qid_list = []
    import sys
    from parsing.logger_test import Logger
    sys.stdout = Logger('./2019.03.20_freebase_relation_domain_range.txt',
                        sys.stdout)

    for line in human_relation_list:
        try:
            domains_set = get_domain(line)
            range_set = get_range(line)
            print(('%s\t%s\t%s') % (line, list(domains_set), list(range_set)))

            # sparql = 'PREFIX : <http://rdf.freebase.com/ns/> SELECT count(distinct ?s) WHERE { ?s :'+line+' ?o}'
            # count_relation = kb_interface.execute_sparql(sparql)
            # print(('%s\t%d') % (line, count_relation.pop()))
            # names = kb_interface.get_names(line)
            # if len(names) > 0:
            #     name = names.pop()
            #     token_list = name.lower().split(' ')
            #     print(('%s\t%s') % (line, '\t'.join(token_list)))
        except Exception as e:
            error_qid_list.append(line)
        # if line not in human_types_list:
        #     print(line)
    print(error_qid_list)
コード例 #3
0
def get_freebase_schema():
    import sys
    from parsing.logger_test import Logger
    sys.stdout = Logger('./2019.05.13_freebase_schema.txt', sys.stdout)
    # types_list = read_list('../dataset/resources_cwq/dataset_freebase_latest/freebase_types')
    # mediators = read_list('../dataset/resources_cwq/dataset_freebase_latest/mediators.tsv')
    # relation_domain_range_list = read_list('../dataset/resources_cwq/dataset_freebase_latest/freebase_relations_domain_range')
    types_list = read_list('./20190512_ywsun/75_all_classes.txt')
    mediators = read_list('./20190512_ywsun/mediators.tsv')
    relation_domain_range_list = read_list(
        './20190512_ywsun/2019.05.12_properties_with_domain_range.txt')

    relation_domain_range_tuple_list = []
    for relation_domain_range in relation_domain_range_list:
        cols = relation_domain_range.split('\t')
        relation = cols[0]
        domains_list = eval(cols[1])
        ranges_list = eval(cols[2])
        relation_domain_range_tuple_list.append(
            (relation, domains_list, ranges_list))

    for type_ in types_list:
        attr = 'main'
        if type_ in mediators:
            attr = 'mediator'
        related_relation_range_list = []
        for i, (relation, domains_list, ranges_list) in \
                enumerate(relation_domain_range_tuple_list):
            if type_ in domains_list:
                related_relation_range_list.append((relation, ranges_list))
        for related_relation_range in related_relation_range_list:
            range = ''
            if len(related_relation_range[1]) > 0:
                range = related_relation_range[1][0]
            if len(related_relation_range[1]) > 1:
                print('error!!!', related_relation_range)
            print(('%s\t%s\t%s\t%s') %
                  (type_, attr, related_relation_range[0], range))
コード例 #4
0
def instance_to_types():
    instance_to_types_dict = dict()
    types_instance_list = read_list('./2019_03_15_freebase_instance_type_1')
    import sys
    from parsing.logger_test import Logger
    sys.stdout = Logger('./2019.03.20_freebase_instance_type_1_reverse.txt',
                        sys.stdout)

    for i, line in enumerate(types_instance_list):
        terms = line.split('\t')
        type_str = terms[0]
        instance = terms[1]
        if instance in instance_to_types_dict.keys():
            instance_to_types_dict[instance].add(type_str)
        else:
            types = set()
            types.add(type_str)
            instance_to_types_dict[instance] = types
    for instance, types in instance_to_types_dict.items():
        print(('%s\t%s') % (instance, str(types)))
コード例 #5
0
def notable_type_to_instances():

    import sys
    from parsing.logger_test import Logger
    sys.stdout = Logger('./2019.03.15_freebase_instance_notable_1.txt',
                        sys.stdout)

    error_qid_list = []
    notable_types_types_list = read_list(
        '../dataset/resources_cwq/dataset_freebase_latest/freebase_notable_types'
    )
    for line in notable_types_types_list:
        try:
            instances = get_instance_by_class_notable_type(line)
            for instance in instances:
                print(('%s\t%s') % (line, instance))
        except Exception as e:
            error_qid_list.append(line)
        # if line not in human_types_list:
        #     print(line)
    print(error_qid_list)
コード例 #6
0
def mediator_to_instances():
    mediators_list = read_list(
        '../dataset/resources_cwq/dataset_freebase_latest/mediators.tsv')
    import sys
    from parsing.logger_test import Logger
    sys.stdout = Logger('./2019.04.10_freebase_mediators_instance_sp.txt',
                        sys.stdout)
    error_qid_list = []

    for i, line in enumerate(mediators_list):
        if line in [
                'common.notable_for', 'medicine.drug_label_section',
                'location.geocode', 'film.performance',
                'measurement_unit.dated_percentage',
                'base.schemastaging.nutrition_information', 'common.webpage',
                'music.track_contribution', 'measurement_unit.dated_integer'
        ]:
            continue
        try:
            sparql = '''SELECT DISTINCT ?s ?p ?instance WHERE {
            ?s ?p ?instance  . 
            ?instance :type.object.type :''' + line + '''
            }'''
            execute_sparql_three_args(sparql, line)

            # instances = get_instance_by_class(line)
            # for instance in instances:
            #     p_o_set, _, _ = get_p_o_by_entity(instance)
            #     for p_o in p_o_set:
            #         print(('%d\t%s\t%s') % (i, instance, p_o))

        except Exception as e:
            error_qid_list.append(line)
        # if line not in human_types_list:
        #     print(line)
    print(error_qid_list)
コード例 #7
0
def get_all_class_names():
    def write_dict(dict, write_file):
        fi = open(write_file, "w", encoding="utf-8")
        # fi.write(str(len(dict)))
        # fi.write("\n")
        for key in dict:
            fi.write(str(key))
            fi.write("\t")
            fi.write(str(dict[key]))
            fi.write("\n")
        fi.close()

    # human_types_list = read_list('../dataset/resources_cwq/dataset_freebase_latest/freebase_types')
    human_types_list = read_list('./freebase_types')
    error_qid_list = []
    name_to_class_dict = OrderedDict()
    for i, line in enumerate(human_types_list):
        try:
            names = get_names(line)
            if len(names) > 0:
                name = names.pop().lower()
                if name in name_to_class_dict:
                    name_to_class_dict[name][line] = 1.0
                else:
                    class_dict = dict()
                    class_dict[line] = 1.0
                    name_to_class_dict[name] = class_dict
                print(i, name)
                # token_list = name.lower().split(' ')
                # print(('%s\t%s') % (line, '\t'.join(token_list)))
        except Exception as e:
            error_qid_list.append(line)
        # if line not in human_types_list:
        #     print(line)
    print('#error:\t', error_qid_list)
    write_dict(name_to_class_dict, './types_reverse.txt')
コード例 #8
0
    for gold in goldList:
        if gold in predictedList:
            truePositives += 1
        else:
            falseNegatives += 1
    for predicted in predictedList:
        if predicted not in goldList:
            falsePositives += 1
    return EvaluationCounts(truePositives=truePositives,
                            falsePositives=falsePositives,
                            falseNegatives=falseNegatives)


if __name__ == '__main__':

    lines = read_list('./sample_el_q_result.txt')
    q_to_system_answer_dict = dict()
    for line in lines:
        cols = line.split('\t')
        q_to_system_answer_dict[cols[1]] = eval(cols[3])

    gold_lines = read_list('./sample_gold_q_result.txt')
    q_to_gold_answer_dict = dict()
    for gold_line in gold_lines:
        cols = gold_line.split('\t')
        q_to_gold_answer_dict[cols[0]] = eval(cols[1])

    counts = []
    for q, goldList in q_to_gold_answer_dict.items():
        predictedList = []
        if q in q_to_system_answer_dict:
コード例 #9
0
def type_to_instances():
    # mediators_list = read_list('../dataset/resources_cwq/dataset_freebase_latest/mediators.tsv')
    human_types_list = read_list(
        '../dataset/resources_cwq/dataset_freebase_latest/freebase_types')

    import sys
    from parsing.logger_test import Logger
    sys.stdout = Logger('./2019.03.15_freebase_instance_type_1.txt',
                        sys.stdout)
    error_qid_list = []

    filter_list_3 = [
        'music.recording', 'music.release_track',
        'base.type_ontology.abstract', 'base.type_ontology.non_agent',
        'common.notable_for', 'common.topic'
    ]
    filter_list_2 = [
        'type.content_import', 'type.content', 'type.namespace',
        'common.document', 'base.type_ontology.agent',
        'base.type_ontology.inanimate', 'base.type_ontology.animate'
    ]

    filter_list_4 = [
        'user.joram.environmental_science_$0026_technology.water_quality',
        'user.rogopag.www$002ecittadiivrea$002ecom.topic',
        'user.player.player_entertainment_group_inc$002e.branding',
        'user.sankeerth.http$003a$002f$002fwebisee$002ecom.topic',
        'user.player.player_entertainment_group_inc$002e.televisions_production',
        'user.player.player_entertainment_group_inc$002e.visual_art',
        'user.robert.world$0027s_tallest.topic',
        'user.rial13.dre_$0022rial$0022_porcher.topic',
        'user.ray315.$0432$0430$043b$044e$0442$0430.topic',
        'user.bluenorthernmusic.musical_artist$002c_music_lessons.topic',
        'user.mad_god.$0418$0441$043a$0443$0441$0441$0442$0432$0435$043d$043d$044b$0439_$0438$043d$0442$0435$043b$043b$0435$043a$0442.topic',
        'user.player.player_entertainment_group_inc$002e.games',
        'user.dreig.web_3$002e0.topic',
        'user.beatyourprice.http$003a$002f$002fwww$002ebeatyourprice$002ecom.topic',
        'user.brabblejr.www$002ebrabble$002ccom.topic',
        'user.player.player_entertainment_group_inc$002e.concerts',
        'user.player.player_entertainment_group_inc$002e.media_common',
        'user.shomoa.magic$003a_the_gathering.subtype',
        'user.mad_god.$0418$0441$043a$0443$0441$0441$0442$0432$0435$043d$043d$044b$0439_$0438$043d$0442$0435$043b$043b$0435$043a$0442.ai',
        'user.shomoa.magic$003a_the_gathering.color',
        'user.gadgetsgalore.www$002er4us$002ecom$002ftrophy.topic',
        'user.player.player_entertainment_group_inc$002e.film',
        'user.robert.world$0027s_tallest.building',
        'user.shomoa.magic$003a_the_gathering.x_type',
        'user.xiongy.$4e2d$56fd.x', 'user.hsetty.web2$002e0.topic',
        'user.rogopag.robanostra$002ehomeftp$002enet.topic',
        'user.freedom2002.$00e2$1ea11ea1c.topic',
        'user.integrity19.taxation_and_pornography$003a_designing_system_to_survive_constitutional_challenges.topic',
        'user.visha.$0645$062d$0645$062f_$062d$0645$06cc$062f_$0634$0627$06be$062f.topic',
        'user.shomoa.magic$003a_the_gathering.card',
        'user.player.player_entertainment_group_inc$002e.entertainment_company',
        'user.rrhobbs.location_scouting$002c_location_management_and_locations_for_film$002c_tv$002c_photo_and_events.topic',
        'user.player.player_entertainment_group_inc$002e.topic',
        'user.shomoa.magic$003a_the_gathering.supertype',
        'user.paulsipot.www$002eunnamedservice$002ecom.topic',
        'user.shomoa.magic$003a_the_gathering.topic',
        'user.zameen.ringtones$002emobi.topic',
        'user.archbishopderrickyoung.archbishop_derrick_l$002e_young_d$002ed$002e$002c_d$002emin$002e.topic',
        'user.player.player_entertainment_group_inc$002e.computer_game_designer',
        'user.xiongy.$4e2d$56fd.topic',
        'user.shomoa.magic$003a_the_gathering.zone',
        'user.player.player_entertainment_group_inc$002e.product_integration',
        'user.saranshsehgal.www$002emcllo$002ecom.topic',
        'user.funkyflash.www$002edujdc$002eorg.topic',
        'user.player.player_entertainment_group_inc$002e.game_development',
        'user.player.player_entertainment_group_inc$002e.tv_program',
        'user.chiliteslegacy.default_domain.the_chi_lites_bass_singer_creadel_jones_had_a_son_darren_in_which_played_a_important_role_in_helping_protect_his_legacy_against_fraud_exploition_and_embelzelments_to_creadel_jones_singer_legacy_and_his_music_his_son_darren_cubie_has_been_a_force_of_truth_and_guidence_for_iconic_legacies_an_thier_futher_darren_has_made_wed_sites_for_the_news_of_legacy_through_out_the_entertainment_field_that_mistreated_by_abuse_and_for_news_related_and_music_to_legendary_artist_icons_and_music_called_http_www_chilites_ning_com_and_http_www_chilites_net_all_are_real_disscussion_stating_information_music_abuse_and_news_and_music_creadel_jones_family_includes_wife_deborah_jones_and_two_sisters',
        'user.joram.environmental_science_$0026_technology.topic',
        'user.player.player_entertainment_group_inc$002e.computer_games',
        'user.mirzak2.www$002emirzak2$002ewebs$002ecom.topic',
        'user.pasidor.pasidor$002ecom.topic',
        'user.imteam1.http$003a$002f$002fwww$002egreenconservationproducts$002ecom$002f.topic',
        'user.player.player_entertainment_group_inc$002e.arts_entertainment',
        'user.rogopag.www$002enastypixel$002ecom.topic',
        'user.kunninmindzradio.http$003a$002f$002fkunninmindz$002ecom.topic'
    ]

    for line in human_types_list:
        if line in filter_list_2 or line in filter_list_3 or line in filter_list_4:
            continue
        try:
            instances = get_instance_by_class(line)
            for instance in instances:
                print(('%s\t%s') % (line, instance))
        except Exception as e:
            error_qid_list.append(line)
        # if line not in human_types_list:
        #     print(line)
    print(error_qid_list)
コード例 #10
0
from common import hand_files

q_mode = globals_args.argument_parser.q_mode

# 2.2 args
if q_mode == 'cwq':
    oracle_file_root = globals_args.fn_cwq_file.grounded_graph_file + 'result/'
    oracle_all_files_path_names = os.listdir(oracle_file_root)
    literal_to_id_map = grounding_utils.read_literal_to_id_map(
        file_root=globals_args.fn_cwq_file.grounded_graph_file)
    kb_relations = hand_files.read_set(
        globals_args.kb_freebase_latest_file.freebase_relations_file)

    mediators_instances_set = hand_files.read_set(
        globals_args.kb_freebase_latest_file.mediators_instances_file)
    schema_lines_list = hand_files.read_list(
        globals_args.kb_freebase_latest_file.schema_file)
    property_reverse_dict = hand_files.read_dict(
        globals_args.kb_freebase_latest_file.freebase_reverse_property)
    literal_property_dict = hand_files.read_dict(
        globals_args.kb_freebase_latest_file.freebase_literal_property)

elif q_mode == 'graphq':
    oracle_file_root = globals_args.fn_graph_file.grounded_graph_file + 'result/'
    oracle_all_files_path_names = os.listdir(oracle_file_root)
    literal_to_id_map = grounding_utils.read_literal_to_id_map(
        file_root=globals_args.fn_graph_file.grounded_graph_file)
    kb_relations = hand_files.read_set(
        globals_args.kb_freebase_en_2013.freebase_relations_file)

    mediators_instances_set = hand_files.read_set(
        globals_args.kb_freebase_en_2013.mediators_instances_file)
コード例 #11
0
ファイル: parsing_args.py プロジェクト: yayuanzi8/SPARQA
from parsing.nltk_nlp_utils import NLTK_NLP
from common import globals_args
from common import hand_files

parser_mode = globals_args.parser_mode
wh_words_set = {
    "what", "which", "whom", "who", "when", "where", "why", "how", "how many",
    "how large", "how big"
}
bert_args = BertArgs(globals_args.root, globals_args.q_mode)
nltk_nlp = NLTK_NLP(globals_args.argument_parser.ip_port)
sutime = SUTime(jars=globals_args.argument_parser.sutime_jar_files,
                mark_time_ranges=True)
unimportantwords = hand_files.read_set(
    globals_args.argument_parser.unimportantwords)
unimportantphrases = hand_files.read_list(
    globals_args.argument_parser.unimportantphrases)
stopwords_dict = hand_files.read_set(
    globals_args.argument_parser.stopwords_dir)
ordinal_lines_dict = hand_files.read_ordinal_file(
    globals_args.argument_parser.ordinal_fengli
)  #2 {'second', '2ndis_equal_wh_word'}

count_phrases = [
    'Count', 'How many', 'how many', 'the number of', 'the count of',
    'the amount of', 'total number of', 'count'
]
count_ner_tags = ['count']
dayu_phrases = [
    'more', 'more than', 'greater', 'higher', 'longer than', 'taller than'
]  #'over',
dayu_dengyu_phrases = ['at least', 'not less than', 'or more']