def test_1_gram(in_path, out_path): lfreq, ltotal = build_pfdict(dic_path) # print(lfreq, ltotal) seg = [] start_time = time.time() segmentation_mr(in_path, seg, lfreq, ltotal) end_time = time.time() print(round((end_time - start_time) * 1000), 'ms') output_2_file(seg, out_path) # 分词评价 analysis(out_path, gold_std_path)
def test_2_gram(in_path, out_path): lfreq, ltotal = build_pfdict(dic_path) pfreq = pro_prefix_dictionary() seg = [] start_time = time.time() segmentation_graph(in_path, seg, lfreq, pfreq) end_time = time.time() print(round((end_time - start_time) * 1000), 'ms') output_2_file(seg, out_path) # 分词评价 analysis(out_path, gold_std_path)
def main(): lfreq, ltotal = build_pfdict(dic_path) pfreq = pro_prefix_dictionary() seg = [] start_time = time.time() segmentation_bigram(sent_path, seg, lfreq, pfreq) end_time = time.time() print(round((end_time - start_time) * 1000), 'ms') output_2_file(seg, seg_LM_bg_path) analysis(seg_LM_bg_path, gold_std_path)
def main(): seg_text = forward_maximum_matching(dic_path, sent_path) # print(seg_text) output_2_file(seg_text, seg_FMM_path)