예제 #1
0
    #准确率
    precision = float(same_num) / len(my_seg_node_list)
    #覆盖率
    recall = float(same_num) / len(stardard_seg_node_list)

    return (precision, recall)


if __name__ == "__main__":
    if len(sys.argv) != 3:
        print "please input dict file and test data file"
        sys.exit()

    #训练的词典
    dict_file_name = sys.argv[1]
    my_seg = e_segment.DNASegment()
    my_seg.initial_dict(dict_file_name)

    #读取测试文件并测试
    test_file = open(sys.argv[2], "r")

    precison_sum = 0
    recall_sum = 0
    line_num = 0
    seg_pos_precision_sum = 0
    seg_pos_recall_sum = 0
    line_list = test_file.readlines()

    for i in range(0, len(line_list), 3):
        #字符串
        line = line_list[i].strip()
예제 #2
0
    precision = float(same_num) / len(my_seg_node_list)
    #覆盖率
    recall = float(same_num) / len(stardard_seg_node_list)

    return (precision, recall)


if __name__ == "__main__":
    if len(sys.argv) != 3:
        print "please input dict file and test data file"
        sys.exit()

    #构建分词
    #标准词典
    standard_dict = "count_1w.txt"
    stardard_seg = e_segment.DNASegment()
    stardard_seg.initial_dict(standard_dict)

    #训练的词典
    dict_file_name = sys.argv[1]
    my_seg = mi_segment.MISegment()
    my_seg.load_dict(dict_file_name)

    #读取测试文件并测试
    test_file = open(sys.argv[2], "r")
    precison_sum = 0
    recall_sum = 0
    line_num = 0
    seg_pos_precision_sum = 0
    seg_pos_recall_sum = 0