Esempio n. 1
0
    def test_release(self, dataset, local_csvs):
        partition_to_remove = "p2.csv"
        pds = PartitionedDataSet(str(local_csvs), dataset)
        initial_load = pds.load()
        assert partition_to_remove in initial_load

        (local_csvs / partition_to_remove).unlink()
        cached_load = pds.load()
        assert initial_load.keys() == cached_load.keys()

        pds.release()
        load_after_release = pds.load()
        assert initial_load.keys() ^ load_after_release.keys() == {
            partition_to_remove
        }
Esempio n. 2
0
    def test_release_instance_cache(self, local_csvs):
        """Test that cache invalidation does not affect other instances"""
        ds_a = PartitionedDataSet(str(local_csvs), "pandas.CSVDataSet")
        ds_a.load()
        ds_b = PartitionedDataSet(str(local_csvs), "pandas.CSVDataSet")
        ds_b.load()

        assert ds_a._partition_cache.currsize == 1
        assert ds_b._partition_cache.currsize == 1

        # invalidate cache of the dataset A
        ds_a.release()
        assert ds_a._partition_cache.currsize == 0
        # cache of the dataset B is unaffected
        assert ds_b._partition_cache.currsize == 1
Esempio n. 3
0
    def test_release(self, dataset, mocked_csvs_in_s3):
        partition_to_remove = "p2.csv"
        pds = PartitionedDataSet(mocked_csvs_in_s3, dataset)
        initial_load = pds.load()
        assert partition_to_remove in initial_load

        s3 = s3fs.S3FileSystem()
        s3.rm("/".join([mocked_csvs_in_s3, partition_to_remove]))
        cached_load = pds.load()
        assert initial_load.keys() == cached_load.keys()

        pds.release()
        load_after_release = pds.load()
        assert initial_load.keys() ^ load_after_release.keys() == {
            partition_to_remove
        }