def test_can_raise_without_storage_policy(tmpdir, filepath=DATASET_SMALL_FILEPATH): index_fio = tmpdir.join("index.dump") documents = load_documents(filepath) expected_inverted_index = build_inverted_index(documents) expected_inverted_index.dump(index_fio, storage_policy=None) with pytest.raises(AttributeError): InvertedIndex.load(index_fio, storage_policy=3)
def test_can_dump_and_load_inverted_index_with_array_policy_parametrized(filepath, tmpdir): index_fio = tmpdir.join("index.dump") documents = load_documents(filepath) expected_inverted_index = build_inverted_index(documents) expected_inverted_index.dump(index_fio, storage_policy=StoragePolicy) loaded_inverted_index = InvertedIndex.load(index_fio, storage_policy=StoragePolicy) assert expected_inverted_index == loaded_inverted_index, ( "load should return the same inverted index" )
def arguments(small_wikipedia_inverted_index, tmpdir, filepath=DATASET_SMALL_FILEPATH): index_fio = tmpdir.join("index.dump") documents = load_documents(filepath) expected_inverted_index = build_inverted_index(documents) expected_inverted_index.dump(index_fio, storage_policy=StoragePolicy) class Arguments: def __init__(self): self.dataset = DATASET_SMALL_FILEPATH self.index = index_fio self.output = index_fio self.query = [["a", "b"]] return Arguments()
def small_wikipedia_inverted_index(small_sample_wikipedia_documents): wikipedia_inverted_index = build_inverted_index(small_sample_wikipedia_documents) return wikipedia_inverted_index
def wikipedia_inverted_index(wikipedia_documents): wikipedia_inverted_index = build_inverted_index(wikipedia_documents) return wikipedia_inverted_index
def test_can_build_and_query_inverted_index(wikipedia_documents): wikipedia_inverted_index = build_inverted_index(wikipedia_documents) doc_ids = wikipedia_inverted_index.query(["wikipedia"]) assert isinstance(doc_ids, list), ( "inverted index query should return list" )
def test_query_inverted_index_intersect_results(dataset_tiny_fio, query, expected_res): documents = load_documents(dataset_tiny_fio) tiny_inverted_index = build_inverted_index(documents) res = tiny_inverted_index.query(query) assert sorted(res) == sorted(expected_res)
def test_can_load_without_storage_policy(tmpdir, filepath=DATASET_SMALL_FILEPATH): index_fio = tmpdir.join("index.dump") documents = load_documents(filepath) expected_inverted_index = build_inverted_index(documents) expected_inverted_index.dump(index_fio, storage_policy=None) InvertedIndex.load(index_fio, storage_policy=None)