def test_release(self, dataset, local_csvs): partition_to_remove = "p2.csv" pds = PartitionedDataSet(str(local_csvs), dataset) initial_load = pds.load() assert partition_to_remove in initial_load (local_csvs / partition_to_remove).unlink() cached_load = pds.load() assert initial_load.keys() == cached_load.keys() pds.release() load_after_release = pds.load() assert initial_load.keys() ^ load_after_release.keys() == { partition_to_remove }
def test_release_instance_cache(self, local_csvs): """Test that cache invalidation does not affect other instances""" ds_a = PartitionedDataSet(str(local_csvs), "pandas.CSVDataSet") ds_a.load() ds_b = PartitionedDataSet(str(local_csvs), "pandas.CSVDataSet") ds_b.load() assert ds_a._partition_cache.currsize == 1 assert ds_b._partition_cache.currsize == 1 # invalidate cache of the dataset A ds_a.release() assert ds_a._partition_cache.currsize == 0 # cache of the dataset B is unaffected assert ds_b._partition_cache.currsize == 1
def test_release(self, dataset, mocked_csvs_in_s3): partition_to_remove = "p2.csv" pds = PartitionedDataSet(mocked_csvs_in_s3, dataset) initial_load = pds.load() assert partition_to_remove in initial_load s3 = s3fs.S3FileSystem() s3.rm("/".join([mocked_csvs_in_s3, partition_to_remove])) cached_load = pds.load() assert initial_load.keys() == cached_load.keys() pds.release() load_after_release = pds.load() assert initial_load.keys() ^ load_after_release.keys() == { partition_to_remove }