Ejemplo n.º 1
0
def test_1_gram(in_path, out_path):
    lfreq, ltotal = build_pfdict(dic_path)
    # print(lfreq, ltotal)
    seg = []
    start_time = time.time()
    segmentation_mr(in_path, seg, lfreq, ltotal)
    end_time = time.time()
    print(round((end_time - start_time) * 1000), 'ms')
    output_2_file(seg, out_path)
    # 分词评价
    analysis(out_path, gold_std_path)
Ejemplo n.º 2
0
def test_2_gram(in_path, out_path):
    lfreq, ltotal = build_pfdict(dic_path)
    pfreq = pro_prefix_dictionary()
    seg = []
    start_time = time.time()
    segmentation_graph(in_path, seg, lfreq, pfreq)
    end_time = time.time()
    print(round((end_time - start_time) * 1000), 'ms')
    output_2_file(seg, out_path)
    # 分词评价
    analysis(out_path, gold_std_path)
Ejemplo n.º 3
0
def main():
    lfreq, ltotal = build_pfdict(dic_path)
    pfreq = pro_prefix_dictionary()

    seg = []
    start_time = time.time()
    segmentation_bigram(sent_path, seg, lfreq, pfreq)
    end_time = time.time()
    print(round((end_time - start_time) * 1000), 'ms')
    output_2_file(seg, seg_LM_bg_path)

    analysis(seg_LM_bg_path, gold_std_path)
Ejemplo n.º 4
0
def main():
    seg_text = forward_maximum_matching(dic_path, sent_path)
    # print(seg_text)
    output_2_file(seg_text, seg_FMM_path)