def test_build_index2(mock_file): """ Check if we can build index from string with repeated word. """ my_index = InvertedIndex() my_index.build(mock_file) assert my_index.index_data == {'test': [1], 'me': [1], 'first': [1]}
def test_many_queries_not_in_one_article(query): index = InvertedIndex() article_id = '12' with open(ONE_ARTICLE_PATH, 'r') as fd: index.build(fd) assert article_id not in index.find_articles( query), 'find article in query that not in one article'
def test_build_index(mock_file): """ Check if we can build index from simple string. """ my_index = InvertedIndex() my_index.build(mock_file) assert my_index.index_data == {'test': [1], 'me': [1], 'first': [1]}
def test_query2(mock_file): """ Check if we can query some correct words with repeats. """ my_index = InvertedIndex() my_index.build(mock_file) assert my_index.query(['string', 'long']) == {1, 10, 100500}
def main(): os.chdir(RELATIVE_PATH_TO_CORPUS) docs = os.listdir(os.getcwd()) index = InvertedIndex(docs) index.build() QueryHandler(index).loop()
def test_build_index4(mock_file): """ Check if we can build index from file with several lines. """ my_index = InvertedIndex() my_index.build(mock_file) # Check the index assert len(my_index.index_data) == 12 assert len(my_index.index_data.values()) == 12
def test_one_article(): index = InvertedIndex() with open(ONE_ARTICLE_PATH, 'r') as fd: index.build(fd) with open(ONE_ARTICLE_PATH, 'r') as fd: article_id, words = fd.readline().split(maxsplit=1) words = words.split() assert article_id == index.find_articles( words), 'didnt find article in query of all words in article'
def test_query(mock_file): """ Check if we can query some correct words. """ my_index = InvertedIndex() my_index.build(mock_file) assert my_index.query(['test']) == { 1, }
def test_build_index3(mock_file): """ Check if we can build index from string with special characters. """ my_index = InvertedIndex() my_index.build(mock_file) assert my_index.index_data == { 'test': [1], 'test,': [1], 'me': [1], 'first': [1] }
def test_dump_index(mock_file): """ Check if we can dump index. """ my_index = InvertedIndex() my_index.build(mock_file) my_index.dump('my_Test.index') expected_calls = [ "call('my_Test.index', 'wb')", "call().write(b'\\x03\\x00\\x00\\x00')", "call().write(b'\\x00\\x02')", "call().write(b'me')", "call().write(b'\\x00\\x01')", "call().write(b'\\x00\\x04')", "call().write(b'test')", "call().write(b'\\x00\\x05')", "call().write(b'first')" ] all_calls = [str(call) for call in mock_file.mock_calls] for call in expected_calls: assert call in all_calls
def test_build_small_index(): index = InvertedIndex() wiki_path = 'small_test.txt' with open(wiki_path, 'r') as fd: index.build(fd) assert index.inverted_index == _SMALL_INDEX, f'wrong build index from {wiki_path}'