def test_tfidf(): # Test tfidf retrieval with auto-generated ids tfidf = Tfidf() tfidf.fit(DOCUMENTS) result, score = tfidf.query('안녕 scientists', return_scores=True) print(result, score) # test_tfidf()
def test_tfidf(): DOCUMENTS = ["The quick brown fox jumps over the lazy dog", "Surfing surfers do surf on green waves"] # Test tfidf retrieval with auto-generated ids tfidf = Tfidf() tfidf.fit(DOCUMENTS) result = tfidf.query('dog') print(result) assert result[0] == 1 assert result[1] == 0
def test_combined(): model = Word2Vec([doc.split() for doc in documents], iter=1, min_count=1) wcd = WordCentroidDistance(model.wv) tfidf = Tfidf() wcd.fit(documents) # # they can operate on different feilds tfidf.fit(['fox', 'scientists']) match_op = Matching().fit(documents) combined = wcd + tfidf ** 2 retrieval = Retrieval(combined, matching=match_op, labels=[7,42]) result, score = retrieval.query('fox', return_scores=True) result, score = retrieval.query('scientists', return_scores=True) print(result, score) assert result[0] == 7 result = retrieval.query('scientists') assert result[0] == 42
def test_tfidf(): # Test tfidf retrieval with auto-generated ids tfidf = Tfidf() tfidf.fit(documents) result, score = tfidf.query('article', return_scores=True) print(result, score)
vvoca_docs_d = load_ft_model.wv.vocab vvoc_l = list(vvoca_docs_d.keys()) print('===== start ==== copus vocas ==========') print('vvoc_l:%s' % vvoc_l) print('===== end ==== copus vocas ==========') q = jamo_sentence('후대폰 하니님 kt') # wcd # match_op = Matching() # wcd = WordCentroidDistance(load_ft_model.wv) # vvoc_retrieval = Retrieval(wcd, matching=match_op, labels=vvoc_l) # vvoc_retrieval.fit(vvoc_l) # combination tfidf = Tfidf() tfidf.fit(vvoc_l) wcd = WordCentroidDistance(load_ft_model.wv) wcd.fit(vvoc_l) # # they can operate on different feilds match_op = Matching().fit(vvoc_l) combined = wcd + tfidf**2 vvoc_retrieval = Retrieval(combined, matching=match_op, labels=vvoc_l) # print('========= voca 검색어 ==========') # vocas, score = vvoc_retrieval.query(q, return_scores=True) # print('vocas, score') # print(vocas, score) # # print('========= docu 검색어 ==========') # jamo_document = list(map(lambda x: jamo_sentence(x), document))