Beispiel #1
0
def test_tfidf():
    # Test tfidf retrieval with auto-generated ids
    tfidf = Tfidf()
    tfidf.fit(DOCUMENTS)
    result, score = tfidf.query('안녕 scientists', return_scores=True)
    print(result, score)
# test_tfidf()
Beispiel #2
0
def test_tfidf():
    DOCUMENTS = ["The quick brown fox jumps over the lazy dog",
                 "Surfing surfers do surf on green waves"]
    # Test tfidf retrieval with auto-generated ids
    tfidf = Tfidf()
    tfidf.fit(DOCUMENTS)
    result = tfidf.query('dog')
    print(result)
    assert result[0] == 1
    assert result[1] == 0
Beispiel #3
0
def test_combined():
    model = Word2Vec([doc.split() for doc in documents], iter=1, min_count=1)
    wcd = WordCentroidDistance(model.wv)
    tfidf = Tfidf()

    wcd.fit(documents)
    # # they can operate on different feilds
    tfidf.fit(['fox', 'scientists'])
    match_op = Matching().fit(documents)

    combined = wcd + tfidf ** 2

    retrieval = Retrieval(combined, matching=match_op, labels=[7,42])
    result, score = retrieval.query('fox', return_scores=True)
    result, score = retrieval.query('scientists', return_scores=True)

    print(result, score)

    assert result[0] == 7
    result = retrieval.query('scientists')
    assert result[0] == 42
Beispiel #4
0
def test_tfidf():
    # Test tfidf retrieval with auto-generated ids
    tfidf = Tfidf()
    tfidf.fit(documents)
    result, score = tfidf.query('article', return_scores=True)
    print(result, score)
vvoca_docs_d = load_ft_model.wv.vocab
vvoc_l = list(vvoca_docs_d.keys())
print('===== start ==== copus vocas ==========')
print('vvoc_l:%s' % vvoc_l)
print('===== end ==== copus vocas ==========')
q = jamo_sentence('후대폰 하니님 kt')

# wcd
# match_op = Matching()
# wcd = WordCentroidDistance(load_ft_model.wv)
# vvoc_retrieval = Retrieval(wcd, matching=match_op, labels=vvoc_l)
# vvoc_retrieval.fit(vvoc_l)

# combination
tfidf = Tfidf()
tfidf.fit(vvoc_l)

wcd = WordCentroidDistance(load_ft_model.wv)
wcd.fit(vvoc_l)
# # they can operate on different feilds
match_op = Matching().fit(vvoc_l)
combined = wcd + tfidf**2
vvoc_retrieval = Retrieval(combined, matching=match_op, labels=vvoc_l)

# print('========= voca 검색어 ==========')
# vocas, score = vvoc_retrieval.query(q, return_scores=True)
# print('vocas, score')
# print(vocas, score)
#
# print('========= docu 검색어 ==========')
# jamo_document = list(map(lambda x: jamo_sentence(x), document))