Esempio n. 1
0
def test_tfidf():
    # Test tfidf retrieval with auto-generated ids
    tfidf = Tfidf()
    tfidf.fit(documents)
    result = tfidf.query('lazy')
    assert result[0] == 1
    assert result[1] == 0
Esempio n. 2
0
def test_tfidf():
    # Test tfidf retrieval with auto-generated ids 
    tfidf = Tfidf()
    tfidf.fit(documents)
    result = tfidf.query('lazy')
    assert result[0] == 1
    assert result[1] == 0
Esempio n. 3
0
def test_combined():
    model = Word2Vec([doc.split() for doc in documents], iter=1, min_count=1)
    wcd = WordCentroidDistance(model.wv)
    tfidf = Tfidf()

    wcd.fit(documents)
    # # they can operate on different feilds
    tfidf.fit(['fox', 'scientists'])
    match_op = Matching().fit(documents)

    combined = wcd + tfidf**2

    retrieval = Retrieval(combined, matching=match_op, labels=[7, 42])
    result = retrieval.query('fox')
    assert result[0] == 7
    result = retrieval.query('scientists')
    assert result[0] == 42
Esempio n. 4
0
def test_combined():
    model = Word2Vec([doc.split() for doc in documents], iter=1, min_count=1)
    wcd = WordCentroidDistance(model.wv)
    tfidf = Tfidf()

    wcd.fit(documents)
    # # they can operate on different feilds
    tfidf.fit(['fox', 'scientists'])
    match_op = Matching().fit(documents)

    combined = wcd + tfidf ** 2

    retrieval = Retrieval(combined, matching=match_op, labels=[7,42])
    result = retrieval.query('fox')
    assert result[0] == 7 
    result = retrieval.query('scientists')
    assert result[0] == 42
Esempio n. 5
0
def test_retrieval():
    # Test retrieval with given ids
    tfidf = Tfidf()
    retrieval = Retrieval(tfidf)
    ids = ['fox_example', 'lazy_example']
    retrieval.fit(documents, ids)
    result = retrieval.query('fox')
    assert result[0] == 'fox_example'
    assert result[1] == 'lazy_example'
Esempio n. 6
0
def test_expansion_inside_retrieval():
    # Integration test within full retrieval pipeline
    model = Word2Vec([doc.split() for doc in DOCUMENTS], iter=1, min_count=1)
    n_expansions = 2

    tfidf = Tfidf()
    match_op = Matching()
    expansion_op = EmbeddedQueryExpansion(model.wv, m=n_expansions)

    retrieval = Retrieval(
        tfidf,  # The retrieval model
        matching=match_op,
        query_expansion=expansion_op)
    ids = ['fox_ex', 'surf_ex']
    retrieval.fit(DOCUMENTS, ids)
    result = retrieval.query('surfing surfers do surf green')
    assert result[0] == 'surf_ex'