Ejemplo n.º 1
0
def test_can_raise_without_storage_policy(tmpdir, filepath=DATASET_SMALL_FILEPATH):
    index_fio = tmpdir.join("index.dump")
    documents = load_documents(filepath)
    expected_inverted_index = build_inverted_index(documents)
    expected_inverted_index.dump(index_fio, storage_policy=None)
    with pytest.raises(AttributeError):
        InvertedIndex.load(index_fio, storage_policy=3)
Ejemplo n.º 2
0
def test_can_dump_and_load_inverted_index_with_array_policy_parametrized(filepath, tmpdir):
    index_fio = tmpdir.join("index.dump")
    documents = load_documents(filepath)
    expected_inverted_index = build_inverted_index(documents)
    expected_inverted_index.dump(index_fio, storage_policy=StoragePolicy)
    loaded_inverted_index = InvertedIndex.load(index_fio, storage_policy=StoragePolicy)
    assert expected_inverted_index == loaded_inverted_index, (
        "load should return the same inverted index"
    )
Ejemplo n.º 3
0
def arguments(small_wikipedia_inverted_index, tmpdir, filepath=DATASET_SMALL_FILEPATH):
    index_fio = tmpdir.join("index.dump")
    documents = load_documents(filepath)
    expected_inverted_index = build_inverted_index(documents)
    expected_inverted_index.dump(index_fio, storage_policy=StoragePolicy)

    class Arguments:
        def __init__(self):
            self.dataset = DATASET_SMALL_FILEPATH
            self.index = index_fio
            self.output = index_fio
            self.query = [["a", "b"]]

    return Arguments()
Ejemplo n.º 4
0
def small_wikipedia_inverted_index(small_sample_wikipedia_documents):
    wikipedia_inverted_index = build_inverted_index(small_sample_wikipedia_documents)
    return wikipedia_inverted_index
Ejemplo n.º 5
0
def wikipedia_inverted_index(wikipedia_documents):
    wikipedia_inverted_index = build_inverted_index(wikipedia_documents)
    return wikipedia_inverted_index
Ejemplo n.º 6
0
def test_can_build_and_query_inverted_index(wikipedia_documents):
    wikipedia_inverted_index = build_inverted_index(wikipedia_documents)
    doc_ids = wikipedia_inverted_index.query(["wikipedia"])
    assert isinstance(doc_ids, list), (
        "inverted index query should return list"
    )
Ejemplo n.º 7
0
def test_query_inverted_index_intersect_results(dataset_tiny_fio, query, expected_res):
    documents = load_documents(dataset_tiny_fio)
    tiny_inverted_index = build_inverted_index(documents)
    res = tiny_inverted_index.query(query)
    assert sorted(res) == sorted(expected_res)
Ejemplo n.º 8
0
def test_can_load_without_storage_policy(tmpdir, filepath=DATASET_SMALL_FILEPATH):
    index_fio = tmpdir.join("index.dump")
    documents = load_documents(filepath)
    expected_inverted_index = build_inverted_index(documents)
    expected_inverted_index.dump(index_fio, storage_policy=None)
    InvertedIndex.load(index_fio, storage_policy=None)