def test_export_import_cache_with_queries_list(self, tmp_path, query, metadata, results): queries = [ "select top 10 * from Receipts", "select top 20 * from Receipts", "select top 5 * from Receipts", "select top 25 * from Receipts", ] cache_store1 = tmp_path / "cache1" cache_store2 = tmp_path / "cache2" cache_export_file = tmp_path / "cache.zip" store1 = store.FileStore(cache_store=cache_store1) store2 = store.FileStore(cache_store=cache_store2) for query in queries: store1.dump(query, results, metadata) store1.export(cache_export_file, queries=queries[:2]) store2.import_cache(cache_export_file) store1_cache = store1.list() store2_cache = store2.list() assert store1_cache.shape[0] == 4 assert store2_cache.shape[0] == 2 assert set(store2_cache.loc[:, "query"]) == set(queries[:2])
def test_export_import_cache(self, tmp_path, query, metadata, results): cache_store1 = tmp_path / "cache1" cache_store2 = tmp_path / "cache2" cache_export_file = tmp_path / "cache.zip" store1 = store.FileStore(cache_store=cache_store1) store2 = store.FileStore(cache_store=cache_store2) store1.dump(query, results, metadata) store1.export(cache_export_file) store2.import_cache(cache_export_file) assert store1.list().equals(store2.list())
def test_get_filepaths_joblib(self, tmp_path, query): """Test the metadata and results cache file""" s = store.FileStore(cache_store=tmp_path, backend="joblib") metadata_file = s.get_metadata_filepath(query) cache_file = s.get_cache_filepath(query) assert metadata_file.stem == store.hash_query(query) assert cache_file.stem == store.hash_query(query) assert metadata_file == tmp_path / s.serializer.fmt / ( store.hash_query(query) + ".json") assert cache_file == tmp_path / s.serializer.fmt / ( store.hash_query(query) + ".joblib")
def test_cache_independent_from_format(self, tmp_path, metadata, results): query1 = "select top 3 * from receipts" query2 = "SELECT top 3 * FROM receipts" assert utils.normalize_query(query1) == utils.normalize_query(query2) parquet_store = store.FileStore(cache_store=tmp_path, normalize=True) for query in (query1, query2): assert not parquet_store.get_metadata_filepath(query).exists() assert not parquet_store.get_cache_filepath(query).exists() assert not parquet_store.exists(query) parquet_store.dump(query1, results, metadata) for query in (query1, query2): assert parquet_store.get_metadata_filepath(query).exists() assert parquet_store.get_cache_filepath(query).exists() assert parquet_store.exists(query)
def test_init(self, tmp_path): s = store.FileStore(cache_store=tmp_path) assert s.cache_store.exists()
def test_init_joblib(self, tmp_path): s = store.FileStore(cache_store=tmp_path, backend="joblib") assert isinstance(s.serializer, serializer.JoblibSerializer) s.serializer.compression == 0 assert s.cache_store.exists()
def test_init_parquet(self, tmp_path): s = store.FileStore(cache_store=tmp_path, backend="parquet") assert isinstance(s.serializer, serializer.ParquetSerializer) s.serializer.compression == "snappy" assert s.cache_store.exists()
def file_store(tmp_path): return store.FileStore(cache_store=tmp_path, normalize=True)