Beispiel #1
0
def test_query_from_loaded2(mock_file):
    """ Check if we can query some non-existed words. """

    my_index = InvertedIndex()
    my_index.load('test.index')

    assert my_index.query(['me', 'test', 'non existed']) == set()
    assert len(mock_file.mock_calls) > 1
Beispiel #2
0
def test_load_index(mock_file):
    """ Check if we can load index. """

    my_index = InvertedIndex()
    my_index.load('my_Test.index')

    assert my_index.index_data == {'me': [1], 'test': [1, 2], 'you': [2]}
    assert len(mock_file.mock_calls) > 1
def test_dump_load_small_index():
    index = InvertedIndex()
    index.inverted_index = _SMALL_INDEX
    small_index_path = 'small.index'
    with open(small_index_path, 'wb') as fd:
        index.dump(fd)
    with open(small_index_path, 'rb') as fd:
        index.load(fd)
    assert index.inverted_index == _SMALL_INDEX, 'dumped and loaded index not the same'
Beispiel #4
0
def test_query_from_loaded(mock_file):
    """ Check if we can query after we load the index. """

    my_index = InvertedIndex()
    my_index.load('test.index')

    assert my_index.query(['me', 'test']) == {
        1,
    }
    assert len(mock_file.mock_calls) > 1
Beispiel #5
0
def test_check_compression_good():
    json_inverted_index = InvertedIndex.load(
        "inverted_index/inverted_json.index", JsonStoragePolicy())
    compressed_inverted_index = InvertedIndex.load(
        "inverted_index/inverted_json_zip.index", JsonZipStoragePolicy())
    assert json_inverted_index.query(["two", "words"
                                      ]) == compressed_inverted_index.query([
                                          "two", "words"
                                      ]), "compressin give another answer"

    assert json_inverted_index.get_size(
    ) == compressed_inverted_index.get_size(
    ), "compressed file has diff num of records"
def test_init_empty_inverted_index_do_not_raise_exception():
    word_to_docs_mapping = {}
    inverted_index = InvertedIndex(word_to_docs_mapping)

    storage_policy = JsonStoragePolicy()
    inverted_index.dump(INDEX_TMP_PATH, storage_policy=storage_policy)
    loaded_inverted_index = inverted_index.load(INDEX_TMP_PATH,
                                                storage_policy=storage_policy)
    assert word_to_docs_mapping == word_to_docs_mapping

    storage_policy = PickleStoragePolicy()
    inverted_index.dump(INDEX_TMP_PATH, storage_policy=storage_policy)
    loaded_inverted_index = inverted_index.load(INDEX_TMP_PATH,
                                                storage_policy=storage_policy)
    assert word_to_docs_mapping == word_to_docs_mapping
def test_dump_and_load_index(tmp_path, tiny_sample_document):
    dir = tmp_path / "tiny_example_dir"
    dir.mkdir()
    index_file = dir / "tiny_example.index"
    docs = tiny_sample_document
    inv_table = build_inverted_index(docs)
    inv_table.dump(index_file)
    assert inv_table == TINY_SAMPLE_INV_TABLE
    loaded_inv_table = InvertedIndex.load(index_file)
    assert inv_table == loaded_inv_table
def test_can_dump_and_load_inverted_index(tmpdir, small_dataset_index):
    index_fio = tmpdir.join('inverted.index')
    small_dataset_index.dump(index_fio)
    load_inverted_index = InvertedIndex.load(index_fio)
    assert small_dataset_index == load_inverted_index, (
        "load should return the same inverted index"
    )
    assert {} != load_inverted_index, (
        "load should return the same inverted index"
    )
def test_binary_dump_and_load_index(tmp_path,
                                    tiny_sample_document,
                                    words=['of', 'words']):
    dir_ = tmp_path / "tiny_example_dir"
    dir_.mkdir()
    index_file = dir_ / "tiny_example.bin.index"
    docs = tiny_sample_document
    inv_table = build_inverted_index(docs)
    inv_table.dump(index_file, storage_policy='binary')
    assert inv_table == TINY_SAMPLE_INV_TABLE
    loaded_inv_table = InvertedIndex.load(index_file, storage_policy='binary')
    assert inv_table == loaded_inv_table
    # test query callback
    Args = namedtuple('Args', ['index_path', 'words'])
    args = Args(index_path=index_file, words=words)
    response = query_callback(args)
    ethalon_response = [
        {14, 1000},
    ]
    assert response == ethalon_response
Beispiel #10
0
def load_inverted_index():
    return InvertedIndex.load('inv_index.dat')
Beispiel #11
0
def test_can_dump_and_load_inverted_index(tmpdir, wikipedia_inverted_index):
    index_fio = tmpdir.join("index.dump")
    wikipedia_inverted_index.dump(index_fio)
    loaded_inverted_index = InvertedIndex.load(index_fio)
    assert wikipedia_inverted_index == loaded_inverted_index, (
        "load should return the same inverted index")