예제 #1
0
def test_build_index2(mock_file):
    """ Check if we can build index from string with repeated word. """

    my_index = InvertedIndex()
    my_index.build(mock_file)

    assert my_index.index_data == {'test': [1], 'me': [1], 'first': [1]}
def test_many_queries_not_in_one_article(query):
    index = InvertedIndex()
    article_id = '12'
    with open(ONE_ARTICLE_PATH, 'r') as fd:
        index.build(fd)
    assert article_id not in index.find_articles(
        query), 'find article in query that not in one article'
예제 #3
0
def test_build_index(mock_file):
    """ Check if we can build index from simple string. """

    my_index = InvertedIndex()
    my_index.build(mock_file)

    assert my_index.index_data == {'test': [1], 'me': [1], 'first': [1]}
예제 #4
0
def test_query2(mock_file):
    """ Check if we can query some correct words with repeats. """

    my_index = InvertedIndex()
    my_index.build(mock_file)

    assert my_index.query(['string', 'long']) == {1, 10, 100500}
예제 #5
0
파일: search.py 프로젝트: artxc/PyDocSearch
def main():
    os.chdir(RELATIVE_PATH_TO_CORPUS)

    docs = os.listdir(os.getcwd())
    index = InvertedIndex(docs)
    index.build()

    QueryHandler(index).loop()
예제 #6
0
def test_build_index4(mock_file):
    """ Check if we can build index from file with several lines. """
    my_index = InvertedIndex()
    my_index.build(mock_file)

    # Check the index
    assert len(my_index.index_data) == 12
    assert len(my_index.index_data.values()) == 12
def test_one_article():
    index = InvertedIndex()
    with open(ONE_ARTICLE_PATH, 'r') as fd:
        index.build(fd)
    with open(ONE_ARTICLE_PATH, 'r') as fd:
        article_id, words = fd.readline().split(maxsplit=1)
    words = words.split()
    assert article_id == index.find_articles(
        words), 'didnt find article in query of all words in article'
예제 #8
0
def test_query(mock_file):
    """ Check if we can query some correct words. """

    my_index = InvertedIndex()
    my_index.build(mock_file)

    assert my_index.query(['test']) == {
        1,
    }
예제 #9
0
def test_build_index3(mock_file):
    """ Check if we can build index from string with special characters. """

    my_index = InvertedIndex()
    my_index.build(mock_file)

    assert my_index.index_data == {
        'test': [1],
        'test,': [1],
        'me': [1],
        'first': [1]
    }
예제 #10
0
def test_dump_index(mock_file):
    """ Check if we can dump index. """

    my_index = InvertedIndex()
    my_index.build(mock_file)
    my_index.dump('my_Test.index')

    expected_calls = [
        "call('my_Test.index', 'wb')", "call().write(b'\\x03\\x00\\x00\\x00')",
        "call().write(b'\\x00\\x02')", "call().write(b'me')",
        "call().write(b'\\x00\\x01')", "call().write(b'\\x00\\x04')",
        "call().write(b'test')", "call().write(b'\\x00\\x05')",
        "call().write(b'first')"
    ]

    all_calls = [str(call) for call in mock_file.mock_calls]

    for call in expected_calls:
        assert call in all_calls
def test_build_small_index():
    index = InvertedIndex()
    wiki_path = 'small_test.txt'
    with open(wiki_path, 'r') as fd:
        index.build(fd)
    assert index.inverted_index == _SMALL_INDEX, f'wrong build index from {wiki_path}'