def test_inv_index_query(load_inverted_index, wiki_docs): words = ['after', 'were'] doc_ind = InvertedIndex.query(load_inverted_index, words) assert {25, 290}.issubset(doc_ind) words = ['neizvesnie', 'slova'] doc_ind = InvertedIndex.query(load_inverted_index, words) assert len(doc_ind) == 0 words = ['after', 'were'] index_dict = build_inverted_index(wiki_docs) inv_index = InvertedIndex(index_dict) doc_ind = inv_index.query(words) assert {25, 290}.issubset(doc_ind)
def test_query_2_intersect_words(words=['bow', 'tfidf']): inv_idx = InvertedIndex(TEST_INDEX_TABLE) doc_ids = inv_idx.query(words) right_answer = { 4, } assert doc_ids == right_answer
def test_query2(mock_file): """ Check if we can query some correct words with repeats. """ my_index = InvertedIndex() my_index.build(mock_file) assert my_index.query(['string', 'long']) == {1, 10, 100500}
def test_query_from_loaded2(mock_file): """ Check if we can query some non-existed words. """ my_index = InvertedIndex() my_index.load('test.index') assert my_index.query(['me', 'test', 'non existed']) == set() assert len(mock_file.mock_calls) > 1
def test_query_one_doc_in_index(): index = InvertedIndex() index.inverted_index = defaultdict(set, { 'foo': {1, 2, 3}, 'bar': {1}, 'foobar': {1, 2} }) assert index.query(['foo', 'bar' ]) == {1}, 'didnt find a doc, which present in index'
def test_query(mock_file): """ Check if we can query some correct words. """ my_index = InvertedIndex() my_index.build(mock_file) assert my_index.query(['test']) == { 1, }
def test_unicode_query_two_docs_in_index(): index = InvertedIndex() index.inverted_index = defaultdict(set, { 'один': {1, 2, 3}, 'bar': {1}, 'два': {1, 2} }) assert index.query(['один', 'два']) == { 1, 2 }, 'didnt find a two docs, which are present in index with unicode'
def test_query_from_loaded(mock_file): """ Check if we can query after we load the index. """ my_index = InvertedIndex() my_index.load('test.index') assert my_index.query(['me', 'test']) == { 1, } assert len(mock_file.mock_calls) > 1
def test_unseen_word(word='fasttext'): inv_idx = InvertedIndex(TEST_INDEX_TABLE) doc_ids = inv_idx.query([word]) right_answer = None assert doc_ids is right_answer
def test_query_2_words_without_shared_docs(words=['bow', 'cbow']): inv_idx = InvertedIndex(TEST_INDEX_TABLE) doc_ids = inv_idx.query(words) right_answer = None assert doc_ids is right_answer
def test_query_single_word(word='bow'): inv_idx = InvertedIndex(TEST_INDEX_TABLE) doc_ids = inv_idx.query([word]) right_answer = TEST_INDEX_TABLE[word] assert doc_ids == right_answer
def test_query_not_in_index(): index = InvertedIndex() index.inverted_index = defaultdict(set, {'foo': {1, 2, 3}, 'bar': {1}}) assert index.query(['foobar']) == set(), 'find a doc, that not in index'