Exemple #1
0
def run():
    '''
    After reading setting files in ./setting_files
    Choose the test setting through '__main__' args
    '''
    setting_list = get_setting_dict()
    for arg in sys.argv:
        if arg in setting_list.keys():
            setting = setting_list[arg]
            if set_setting(setting): break
            for i in range(len(split(',', str(setting['data_dir_abs_path'])))):
                data_dir_abs_path = split(',', str(setting['data_dir_abs_path']))[i]
                ref_file_name = split(',', str(setting['ref_file_name']))[i]
                text_dict = get_text_dict(data_dir_abs_path)
                data_set_name = split('/', data_dir_abs_path)[-1]
                print "data_set_name : ", data_set_name
                Test(text_dict,
                     data_set_name,
                     ref_file_name,
                     to_bool(setting['csv_head_flag']),
                     int(setting['tf_ver']),
                     int(setting['idf_ver']),
                     to_bool(setting['crank_flag']),
                     to_bool(setting['single_rank_flag']),
                     to_bool(setting['expand_rank_flag']),
                     to_bool(setting['topical_page_rank_flag']),
                     str(setting['relscore_type']),
                     int(setting['winsize_lb']),
                     int(setting['winsize_ub']),
                     int(setting['cutoff_lb']),
                     int(setting['cutoff_ub']),
                     float(setting['lamb_lb']),
                     float(setting['lamb_ub']),
                     float(setting['k1_lb']),
                     float(setting['k1_ub']),
                     float(setting['b_lb']),
                     float(setting['b_ub']))
    settings["dataset"] = raw_input("dataset name : ")
    relscore_list = raw_input("relscore_type(ex. tf_1_1, bm25_3_2_1.2_0.75) : ")
    winsize_list = raw_input("winsize : ")
    settings["winsize"] = re.split("\s", winsize_list)
    settings["relscore"] = re.split("\s", relscore_list)
    return settings


# ref_file_path = '/Users/KimKR/Desktop/NEXT_LAB/keyword/gold_standard/inspec.ref'
# GsFunctionalTest(ref_file_path)


if __name__ == "__main__":
    settings = get_setting()
    data_set_name = settings["dataset"]
    text_dict = get_text_dict("/Users/KimKR/Desktop/NEXT_LAB/keyword/%s" % (data_set_name))
    # check equality of two methods of getting relevance score
    for relscore_type in settings["relscore"]:
        relscore = re.split("_", relscore_type)
        k1 = 1.2
        b = 0.75
        if len(relscore) > 4:
            k1 = relscore[3]
            b = relscore[4]
        ft1 = RelscoreFunctionalTest(
            text_dict, data_set_name, relscore[0], int(relscore[1]), int(relscore[2]), float(k1), float(b)
        )
    # check equality of two methods of getting keyphrase_candidate
    for relscore_type in settings["relscore"]:
        for winsize in settings["winsize"]:
            print "relscore_type : %s, winsize : %s" % (relscore_type, winsize)