예제 #1
0
 def test_mix(self):
     doc1 = open('sim_0.8/orig.txt', 'r', encoding='utf-8').read()
     doc2 = open('sim_0.8/orig_0.8_mix.txt', 'r', encoding='utf-8').read()
     doc1 = jc.dec_stopwords(doc1)
     doc2 = jc.dec_stopwords(doc2)
     doc_1 = " ".join(doc1)
     doc_2 = " ".join(doc2)
     xx = jc.sim_value(doc_1, doc_2)
     print(xx)
     self.assertGreaterEqual(xx, 0)
     self.assertLessEqual(xx, 1)
예제 #2
0
파일: main.py 프로젝트: fzu021800901/t1
import jc
import sys
'''从命令行读取参数'''
try:
    file_path_0 = sys.argv[1]
    file_path_1 = sys.argv[2]
    out_put_path = sys.argv[3]

except:
    print("缺少参数!")
'''测试路径'''
#file_path_0 = open('sim_0.8/orig.txt', 'r', encoding='utf-8').read()
#file_path_1 = open('sim_0.8/orig_0.8_rep.txt', 'r', encoding='utf-8').read()
'''对文本进行停词删除操作'''
doc0 = jc.dec_stopwords(file_path_0)
doc1 = jc.dec_stopwords(file_path_1)

doc_0 = " ".join(doc0)
doc_1 = " ".join(doc1)
'''计算得出相似度'''
xx = jc.sim_value(doc_0, doc_1)
print(xx)

try:
    output_file = open(out_put_path, "w")
    output_file.write("%.2f" % (xx))
    output_file.close()
except:
    print("%s打开失败 " % (sys.argv[3]))

print("OK")