Exemple #1
0
    def test_transform(self):
        # Check that transform function is applied and used as a cache key
        cache = {}
        path = os.path.join(cache_folder, 'data.csv')

        data = gramex.cache.open(path, 'csv', transform=len, _cache=cache)
        eq_(data, len(pd.read_csv(path)))                   # noqa - ignore encoding
        cache_key = (path, 'csv', hashfn(len), frozenset([]))
        self.assertIn(cache_key, cache)

        def transform2(d):
            return d['a'].sum()

        data = gramex.cache.open(path, 'csv', transform=transform2, _cache=cache)
        eq_(data, pd.read_csv(path)['a'].sum())             # noqa - ignore encoding
        cache_key = (path, 'csv', hashfn(transform2), frozenset([]))
        self.assertIn(cache_key, cache)

        # Check that non-callable transforms are ignored but used as cache key
        data = gramex.cache.open(path, 'csv', transform='ignore', _cache=cache)
        assert_frame_equal(data, pd.read_csv(path))         # noqa - ignore encoding
        cache_key = (path, 'csv', hashfn('ignore'), frozenset([]))
        self.assertIn(cache_key, cache)

        # Check that temporary caches are hashed by function
        v = 1
        data = gramex.cache.open(path, 'csv', lambda x: v, _cache=cache)
        eq_(data, 1)
        v = 2
        data = gramex.cache.open(path, 'csv', lambda x: v, _cache=cache)
        eq_(data, 2)
Exemple #2
0
    def test_custom_cache(self):
        path = os.path.join(cache_folder, 'data.csv')
        cache = {}
        kwargs = {'_reload_status': True, '_cache': cache}
        result, reloaded = gramex.cache.open(path, 'csv', **kwargs)
        cache_key = (path, 'csv', hashfn(None), frozenset())
        self.assertIn(cache_key, cache)

        # Initially, the file is loaded
        eq_(reloaded, True)

        # Next time, it's loaded from the cache
        result, reloaded = gramex.cache.open(path, 'csv', **kwargs)
        eq_(reloaded, False)

        # If the cache is deleted, it reloads
        del cache[cache_key]
        result, reloaded = gramex.cache.open(path, 'csv', **kwargs)
        eq_(reloaded, True)

        # Additional kwargs are part of the cache key
        result, reloaded = gramex.cache.open(path, encoding='utf-8', **kwargs)
        cache_key = (path, None, hashfn(None),
                     frozenset([('encoding', 'utf-8')]))
        self.assertIn(cache_key, cache)
        eq_(reloaded, True)
        result, reloaded = gramex.cache.open(path, encoding='utf-8', **kwargs)
        eq_(reloaded, False)

        # Changing the kwargs reloads the data
        result, reloaded = gramex.cache.open(path, encoding='cp1252', **kwargs)
        eq_(reloaded, True)
        result, reloaded = gramex.cache.open(path, encoding='cp1252', **kwargs)
        eq_(reloaded, False)

        # Cache is not fazed by non-hashable inputs.
        result, reloaded = gramex.cache.open(path,
                                             header=0,
                                             parse_dates={'date': [0, 1, 2]},
                                             dtype={
                                                 'a': int,
                                                 'b': float,
                                                 'c': int
                                             },
                                             **kwargs)
        cache_key = (
            path,
            None,
            hashfn(None),
            frozenset([
                ('header', 0),  # hashable values hashed as-is
                ('parse_dates',
                 '{"date":[0,1,2]}'),  # converts to compact json if possible
                ('dtype', None),  # gives up with None otherwise
            ]))
        self.assertIn(cache_key, cache)
Exemple #3
0
    def test_change_cache(self):
        # gramex.cache.open_cache() changes the default cache
        path = os.path.join(cache_folder, 'data.csv')
        new_cache = {}
        old_cache = gramex.cache._OPEN_CACHE
        cache_key = (path, 'csv', hashfn(None), frozenset())

        # Ensure that the path is cached
        gramex.cache.open(path, 'csv')
        self.assertIn(cache_key, old_cache)
        old_cache_data = dict(old_cache)

        # Updating the cache copies data and empties from the old one
        gramex.cache.open_cache(new_cache)
        eq_(new_cache, old_cache_data)
        eq_(old_cache, {})

        # New requests are cached in the new cache
        result, reloaded = gramex.cache.open(path, 'csv', _reload_status=True)
        eq_(reloaded, False)
        self.assertIn(cache_key, new_cache)
        del new_cache[cache_key]
        old_cache.pop(cache_key, None)
        self.assertNotIn(cache_key, new_cache)
        result, reloaded = gramex.cache.open(path, 'csv', _reload_status=True)
        eq_(reloaded, True)
        self.assertIn(cache_key, new_cache)
        self.assertNotIn(cache_key, old_cache)