Python Tfidf Examples

Programming Language: Python

Namespace/Package Name: ir.base

Class/Type: Tfidf

Examples at hotexamples.com: 8

Python Tfidf - 8 examples found. These are the top rated real world Python examples of ir.base.Tfidf extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Tfidf(8)

fit(5)

query(3)

Frequently Used Methods

Tfidf (8)

fit (5)

query (3)

Example #1

Show file

File: test_similar_ir.py Project: tobby2002/vec4ir

def test_tfidf():
    # Test tfidf retrieval with auto-generated ids
    tfidf = Tfidf()
    tfidf.fit(DOCUMENTS)
    result, score = tfidf.query('안녕 scientists', return_scores=True)
    print(result, score)
# test_tfidf()

Example #2

Show file

def test_tfidf():
    DOCUMENTS = ["The quick brown fox jumps over the lazy dog",
                 "Surfing surfers do surf on green waves"]
    # Test tfidf retrieval with auto-generated ids
    tfidf = Tfidf()
    tfidf.fit(DOCUMENTS)
    result = tfidf.query('dog')
    print(result)
    assert result[0] == 1
    assert result[1] == 0

Example #3

Show file

def test_expansion_inside_retrieval():
    # Integration test within full retrieval pipeline
    model = Word2Vec([doc.split() for doc in DOCUMENTS], iter=1, min_count=1)
    # model.save('model_w2v_e')
    # model.init_sims(replace=True)
    # model = Word2Vec.load('model_w2v_e')
    n_expansions = 2
    tfidf = Tfidf()
    match_op = Matching()
    expansion_op = EmbeddedQueryExpansion(model.wv, m=n_expansions)

    retrieval = Retrieval(
        tfidf,  # The retrieval model
        matching=match_op,
        query_expansion=expansion_op)
    # ids = ['fox_ex', 'surf_ex']
    # retrieval.fit(DOCUMENTS, ids)
    retrieval.fit(DOCUMENTS)
    start = time.time()  # 시작 시간 저장
    result = retrieval.query("An 81-year-old woman named Eileen")
    print(result)

    result, score = retrieval.query("한국에서 가장 좋은 나라", return_scores=True)

    print("time :", time.time() - start)  # 현재시각 - 시작시간 = 실행 시간
    print('result:%s' % result)
    print('score:%s' % score)

Example #4

Show file

def test_retrieval():
    DOCUMENTS = ["The quick brown fox jumps over the lazy dog",
                 "Surfing surfers do surf on green waves"]
    # Test retrieval with given ids
    tfidf = Tfidf()
    retrieval = Retrieval(tfidf)
    ids = ['fox_example', 'lazy_example']
    retrieval.fit(DOCUMENTS, ids)
    result = retrieval.query('fox')
    print(result)

Example #5

Show file

def test_combined():
    model = Word2Vec([doc.split() for doc in documents], iter=1, min_count=1)
    wcd = WordCentroidDistance(model.wv)
    tfidf = Tfidf()

    wcd.fit(documents)
    # # they can operate on different feilds
    tfidf.fit(['fox', 'scientists'])
    match_op = Matching().fit(documents)

    combined = wcd + tfidf ** 2

    retrieval = Retrieval(combined, matching=match_op, labels=[7,42])
    result, score = retrieval.query('fox', return_scores=True)
    result, score = retrieval.query('scientists', return_scores=True)

    print(result, score)

    assert result[0] == 7
    result = retrieval.query('scientists')
    assert result[0] == 42

Example #6

Show file

File: test_query_expansion.py Project: tobby2002/vec4ir

def test_expansion_inside_retrieval():
    # Integration test within full retrieval pipeline
    model = Word2Vec([doc.split() for doc in DOCUMENTS], iter=1, min_count=1)
    n_expansions = 2

    tfidf = Tfidf()
    match_op = Matching()
    expansion_op = EmbeddedQueryExpansion(model.wv, m=n_expansions)

    retrieval = Retrieval(tfidf,  # The retrieval model
                          matching=match_op,
                          query_expansion=expansion_op)
    # ids = ['fox_ex', 'surf_ex']
    # retrieval.fit(DOCUMENTS, ids)
    retrieval.fit(DOCUMENTS)
    # result = retrieval.query('vec4ir evaluate uses IDF re-weighted')
    result = retrieval.query('art news')
    print(result)

Example #7

Show file

def test_tfidf():
    # Test tfidf retrieval with auto-generated ids
    tfidf = Tfidf()
    tfidf.fit(documents)
    result, score = tfidf.query('article', return_scores=True)
    print(result, score)

Example #8

Show file

File: fasttext_proposeword_test.py Project: tobby2002/vec4ir

vvoca_docs_d = load_ft_model.wv.vocab
vvoc_l = list(vvoca_docs_d.keys())
print('===== start ==== copus vocas ==========')
print('vvoc_l:%s' % vvoc_l)
print('===== end ==== copus vocas ==========')
q = jamo_sentence('후대폰 하니님 kt')

# wcd
# match_op = Matching()
# wcd = WordCentroidDistance(load_ft_model.wv)
# vvoc_retrieval = Retrieval(wcd, matching=match_op, labels=vvoc_l)
# vvoc_retrieval.fit(vvoc_l)

# combination
tfidf = Tfidf()
tfidf.fit(vvoc_l)

wcd = WordCentroidDistance(load_ft_model.wv)
wcd.fit(vvoc_l)
# # they can operate on different feilds
match_op = Matching().fit(vvoc_l)
combined = wcd + tfidf**2
vvoc_retrieval = Retrieval(combined, matching=match_op, labels=vvoc_l)

# print('========= voca 검색어 ==========')
# vocas, score = vvoc_retrieval.query(q, return_scores=True)
# print('vocas, score')
# print(vocas, score)
#
# print('========= docu 검색어 ==========')