def test_transform(self): # Check that transform function is applied and used as a cache key cache = {} path = os.path.join(cache_folder, 'data.csv') data = gramex.cache.open(path, 'csv', transform=len, _cache=cache) eq_(data, len(pd.read_csv(path))) # noqa - ignore encoding cache_key = (path, 'csv', hashfn(len), frozenset([])) self.assertIn(cache_key, cache) def transform2(d): return d['a'].sum() data = gramex.cache.open(path, 'csv', transform=transform2, _cache=cache) eq_(data, pd.read_csv(path)['a'].sum()) # noqa - ignore encoding cache_key = (path, 'csv', hashfn(transform2), frozenset([])) self.assertIn(cache_key, cache) # Check that non-callable transforms are ignored but used as cache key data = gramex.cache.open(path, 'csv', transform='ignore', _cache=cache) assert_frame_equal(data, pd.read_csv(path)) # noqa - ignore encoding cache_key = (path, 'csv', hashfn('ignore'), frozenset([])) self.assertIn(cache_key, cache) # Check that temporary caches are hashed by function v = 1 data = gramex.cache.open(path, 'csv', lambda x: v, _cache=cache) eq_(data, 1) v = 2 data = gramex.cache.open(path, 'csv', lambda x: v, _cache=cache) eq_(data, 2)
def test_custom_cache(self): path = os.path.join(cache_folder, 'data.csv') cache = {} kwargs = {'_reload_status': True, '_cache': cache} result, reloaded = gramex.cache.open(path, 'csv', **kwargs) cache_key = (path, 'csv', hashfn(None), frozenset()) self.assertIn(cache_key, cache) # Initially, the file is loaded eq_(reloaded, True) # Next time, it's loaded from the cache result, reloaded = gramex.cache.open(path, 'csv', **kwargs) eq_(reloaded, False) # If the cache is deleted, it reloads del cache[cache_key] result, reloaded = gramex.cache.open(path, 'csv', **kwargs) eq_(reloaded, True) # Additional kwargs are part of the cache key result, reloaded = gramex.cache.open(path, encoding='utf-8', **kwargs) cache_key = (path, None, hashfn(None), frozenset([('encoding', 'utf-8')])) self.assertIn(cache_key, cache) eq_(reloaded, True) result, reloaded = gramex.cache.open(path, encoding='utf-8', **kwargs) eq_(reloaded, False) # Changing the kwargs reloads the data result, reloaded = gramex.cache.open(path, encoding='cp1252', **kwargs) eq_(reloaded, True) result, reloaded = gramex.cache.open(path, encoding='cp1252', **kwargs) eq_(reloaded, False) # Cache is not fazed by non-hashable inputs. result, reloaded = gramex.cache.open(path, header=0, parse_dates={'date': [0, 1, 2]}, dtype={ 'a': int, 'b': float, 'c': int }, **kwargs) cache_key = ( path, None, hashfn(None), frozenset([ ('header', 0), # hashable values hashed as-is ('parse_dates', '{"date":[0,1,2]}'), # converts to compact json if possible ('dtype', None), # gives up with None otherwise ])) self.assertIn(cache_key, cache)
def test_change_cache(self): # gramex.cache.open_cache() changes the default cache path = os.path.join(cache_folder, 'data.csv') new_cache = {} old_cache = gramex.cache._OPEN_CACHE cache_key = (path, 'csv', hashfn(None), frozenset()) # Ensure that the path is cached gramex.cache.open(path, 'csv') self.assertIn(cache_key, old_cache) old_cache_data = dict(old_cache) # Updating the cache copies data and empties from the old one gramex.cache.open_cache(new_cache) eq_(new_cache, old_cache_data) eq_(old_cache, {}) # New requests are cached in the new cache result, reloaded = gramex.cache.open(path, 'csv', _reload_status=True) eq_(reloaded, False) self.assertIn(cache_key, new_cache) del new_cache[cache_key] old_cache.pop(cache_key, None) self.assertNotIn(cache_key, new_cache) result, reloaded = gramex.cache.open(path, 'csv', _reload_status=True) eq_(reloaded, True) self.assertIn(cache_key, new_cache) self.assertNotIn(cache_key, old_cache)