def test_tfidf(): # Test tfidf retrieval with auto-generated ids tfidf = Tfidf() tfidf.fit(documents) result = tfidf.query('lazy') assert result[0] == 1 assert result[1] == 0
def test_combined(): model = Word2Vec([doc.split() for doc in documents], iter=1, min_count=1) wcd = WordCentroidDistance(model.wv) tfidf = Tfidf() wcd.fit(documents) # # they can operate on different feilds tfidf.fit(['fox', 'scientists']) match_op = Matching().fit(documents) combined = wcd + tfidf**2 retrieval = Retrieval(combined, matching=match_op, labels=[7, 42]) result = retrieval.query('fox') assert result[0] == 7 result = retrieval.query('scientists') assert result[0] == 42
def test_combined(): model = Word2Vec([doc.split() for doc in documents], iter=1, min_count=1) wcd = WordCentroidDistance(model.wv) tfidf = Tfidf() wcd.fit(documents) # # they can operate on different feilds tfidf.fit(['fox', 'scientists']) match_op = Matching().fit(documents) combined = wcd + tfidf ** 2 retrieval = Retrieval(combined, matching=match_op, labels=[7,42]) result = retrieval.query('fox') assert result[0] == 7 result = retrieval.query('scientists') assert result[0] == 42
def test_retrieval(): # Test retrieval with given ids tfidf = Tfidf() retrieval = Retrieval(tfidf) ids = ['fox_example', 'lazy_example'] retrieval.fit(documents, ids) result = retrieval.query('fox') assert result[0] == 'fox_example' assert result[1] == 'lazy_example'
def test_expansion_inside_retrieval(): # Integration test within full retrieval pipeline model = Word2Vec([doc.split() for doc in DOCUMENTS], iter=1, min_count=1) n_expansions = 2 tfidf = Tfidf() match_op = Matching() expansion_op = EmbeddedQueryExpansion(model.wv, m=n_expansions) retrieval = Retrieval( tfidf, # The retrieval model matching=match_op, query_expansion=expansion_op) ids = ['fox_ex', 'surf_ex'] retrieval.fit(DOCUMENTS, ids) result = retrieval.query('surfing surfers do surf green') assert result[0] == 'surf_ex'