class MemcachedNonceCache(object): """Object for managing a cache of used nonce values in memcached. This class allow easy timestamp-based management of client-generated nonces for hawkauthlib. It stores the nonces in memcached so that they can be shared between different webserver processes. Each timestamp+nonce combo is stored under a key sha1(<timestamp>:<nonce>). """ def __init__(self, window=None, get_time=None, cache_server=None, cache_key_prefix="noncecache:", cache_pool_size=None, cache_pool_timeout=60, **kwds): # Memcached ttls are in integer seconds, so round up to the nearest. if window is None: window = DEFAULT_TIMESTAMP_WINDOW else: window = int(math.ceil(window)) self.window = window self.get_time = get_time or time.time self.mcclient = MemcachedClient(cache_server, cache_key_prefix, cache_pool_size, cache_pool_timeout) def __len__(self): raise NotImplementedError def check_nonce(self, timestamp, nonce): """Check if the given timestamp+nonce is fresh. This method checks that the given timestamp is within the configured time window, and that the given nonce has not previously been seen with that timestamp. It returns True if the nonce is fresh and False if it is stale. Fresh nonces are stored in memcache so that subsequent checks of the same nonce will return False. """ now = self.get_time() # Check if the timestamp is within the configured window. ts_min = now - self.window ts_max = now + self.window if not ts_min < timestamp < ts_max: return False # Check if it's in memcached, adding it if not. # Fortunately memcached 'add' has precisely the right semantics # of "create if not exists" key = urlsafe_b64encode(sha1("%d:%s" % (timestamp, nonce)).digest()) try: if not self.mcclient.add(key, 1, time=self.window): return False except ValueError: return False # Successfully added, the nonce must be fresh. return True
class MemcachedStorage(SyncStorage): """Memcached caching wrapper for SyncStorage backends. The SyncStorage implementation wraps another storage backend to provide a caching layer. You may specify the following arguments: * storage: the underlying SyncStorage object that is to be wrapped.a * cache_servers: a list of memcached server URLs. * cached_collections: a list of names of collections that should be duplicated into memcache for fast access. * cache_only_collections: a list of names of collections that should be stored *only* in memcached, and never written through to the bacend. * cache_key_prefix: a string to be prepended to all memcached keys, useful for namespacing in shared cache setups. * cache_pool_size: the maximum number of active memcache clients. * cache_pool_timeout: the maximum lifetime of each memcache client. """ def __init__(self, storage, cache_servers=None, cache_key_prefix="", cache_pool_size=None, cache_pool_timeout=60, cached_collections=(), cache_only_collections=(), cache_lock=False, cache_lock_ttl=None, **kwds): self.storage = storage self.cache = MemcachedClient(cache_servers, cache_key_prefix, cache_pool_size, cache_pool_timeout) self.cached_collections = {} for collection in aslist(cached_collections): colmgr = CachedManager(self, collection) self.cached_collections[collection] = colmgr self.cache_only_collections = {} for collection in aslist(cache_only_collections): colmgr = CacheOnlyManager(self, collection) self.cache_only_collections[collection] = colmgr self.cache_lock = cache_lock if cache_lock_ttl is None: self.cache_lock_ttl = DEFAULT_CACHE_LOCK_TTL else: self.cache_lock_ttl = cache_lock_ttl # Keep a threadlocal to track the currently-held locks. # This is needed to make the read locking API reentrant. self._tldata = threading.local() def iter_cache_keys(self, userid): """Iterator over all potential cache keys for the given userid. This method yields all potential cache keys for the given userid, including their metadata key and the keys for any cached collections. The yielded keys do *not* include the key prefix, if any. """ yield _key(userid, "metadata") for colmgr in self.cached_collections.itervalues(): yield colmgr.get_key(userid) for colmgr in self.cache_only_collections.itervalues(): yield colmgr.get_key(userid) def _get_collection_manager(self, collection): """Get a collection-management object for the named collection. This class delegates all collection-level operations to a "collection manager" object. The manager for a given collection will be different depending on the required caching characteristics, and this method gets and returns on appropriate manager for the named collection. """ try: return self.cached_collections[collection] except KeyError: try: return self.cache_only_collections[collection] except KeyError: return UncachedManager(self, collection) # # APIs for collection-level locking. # # This class provides the option of locking at the memcache level rather # than calling through to the underlying storage engine. Such locks # are just simple mutex keys in memcache, one per collection. If you # can successfully add the key then you get the lock, if it already # exists then someone else holds the lock. If you crash while holding # the lock, it will eventually expire. # def lock_for_read(self, userid, collection): """Acquire a shared read lock on the named collection.""" if self.cache_lock or collection in self.cache_only_collections: return self._lock_in_memcache(userid, collection) else: return self.storage.lock_for_read(userid, collection) def lock_for_write(self, userid, collection): """Acquire an exclusive write lock on the named collection.""" if self.cache_lock or collection in self.cache_only_collections: return self._lock_in_memcache(userid, collection) else: return self.storage.lock_for_write(userid, collection) @contextlib.contextmanager def _lock_in_memcache(self, userid, collection): """Helper method to take a memcache-level lock on a collection.""" # Use a thread-local set of held locks to make this reentrant. try: locked_collections = self._tldata.locked_collections except AttributeError: locked_collections = self._tldata.locked_collections = set() if (userid, collection) in locked_collections: yield None return # Take the lock in memcached. ttl = self.cache_lock_ttl now = time.time() key = _key(userid, "lock", collection) if not self.cache.add(key, True, time=ttl): raise ConflictError locked_collections.add((userid, collection)) try: yield None finally: locked_collections.remove((userid, collection)) if time.time() - now >= ttl: msg = "Lock expired while we were holding it" raise RuntimeError(msg) self.cache.delete(key) # # APIs to operate on the entire storage. # def get_storage_timestamp(self, userid): """Returns the last-modified timestamp for the entire storage.""" # Try to use the cached value. ts = self._get_metadata(userid)["modified"] # Fall back to live data if it's dirty. if ts is None: ts = self.storage.get_storage_timestamp(userid) for colmgr in self.cache_only_collections.itervalues(): try: ts = max(ts, colmgr.get_timestamp(userid)) except CollectionNotFoundError: pass return ts def get_collection_timestamps(self, userid): """Returns the collection timestamps for a user.""" # Try to use the cached value. timestamps = self._get_metadata(userid)["collections"] # Fall back to live data for any collections that are dirty. for collection, ts in timestamps.items(): if ts is None: colmgr = self._get_collection_manager(collection) try: timestamps[collection] = colmgr.get_timestamp(userid) except CollectionNotFoundError: del timestamps[collection] return timestamps def get_collection_counts(self, userid): """Returns the collection counts.""" # Read most of the data from the database. counts = self.storage.get_collection_counts(userid) # Add in counts for collections stored only in memcache. for colmgr in self.cache_only_collections.itervalues(): try: items = colmgr.get_items(userid)["items"] except CollectionNotFoundError: pass else: counts[colmgr.collection] = len(items) return counts def get_collection_sizes(self, userid): """Returns the total size for each collection.""" # Read most of the data from the database. sizes = self.storage.get_collection_sizes(userid) # Add in sizes for collections stored only in memcache. for colmgr in self.cache_only_collections.itervalues(): try: items = colmgr.get_items(userid)["items"] payloads = (item.get("payload", "") for item in items) sizes[colmgr.collection] = sum(len(p) for p in payloads) except CollectionNotFoundError: pass # Since we've just gone to the trouble of recalculating sizes, # we might as well update the cached total size as well. self._update_total_size(userid, sum(sizes.itervalues())) return sizes def get_total_size(self, userid, recalculate=False): """Returns the total size of a user's storage data.""" return self._get_metadata(userid, recalculate)["size"] def delete_storage(self, userid): """Removes all data for the user.""" for key in self.iter_cache_keys(userid): self.cache.delete(key) self.storage.delete_storage(userid) # # APIs to operate on an individual collection # def get_collection_timestamp(self, userid, collection): """Returns the last-modified timestamp for the named collection.""" # It's likely cheaper to read all cached timestamps out of memcache # than to read just the single timestamp from the database. timestamps = self.get_collection_timestamps(userid) try: ts = timestamps[collection] except KeyError: raise CollectionNotFoundError # Refresh from the live data if dirty. if ts is None: colmgr = self._get_collection_manager(collection) ts = colmgr.get_timestamp(userid) return ts def get_items(self, userid, collection, **kwds): """Returns items from a collection""" colmgr = self._get_collection_manager(collection) return colmgr.get_items(userid, **kwds) def get_item_ids(self, userid, collection, **kwds): """Returns item idss from a collection""" colmgr = self._get_collection_manager(collection) return colmgr.get_item_ids(userid, **kwds) def set_items(self, userid, collection, items): """Creates or updates multiple items in a collection.""" colmgr = self._get_collection_manager(collection) with self._mark_collection_dirty(userid, collection) as update: ts = colmgr.set_items(userid, items) size = sum(len(item.get("payload", "")) for item in items) update(ts, ts, size) return ts def delete_collection(self, userid, collection): """Deletes an entire collection.""" colmgr = self._get_collection_manager(collection) with self._mark_collection_dirty(userid, collection) as update: ts = colmgr.del_collection(userid) update(ts, None) return ts def delete_items(self, userid, collection, items): """Deletes multiple items from a collection.""" colmgr = self._get_collection_manager(collection) with self._mark_collection_dirty(userid, collection) as update: ts = colmgr.del_items(userid, items) update(ts, ts) return ts # # Items APIs # def get_item_timestamp(self, userid, collection, item): """Returns the last-modified timestamp for the named item.""" colmgr = self._get_collection_manager(collection) return colmgr.get_item_timestamp(userid, item) def get_item(self, userid, collection, item): """Returns one item from a collection.""" colmgr = self._get_collection_manager(collection) return colmgr.get_item(userid, item) def set_item(self, userid, collection, item, data): """Creates or updates a single item in a collection.""" colmgr = self._get_collection_manager(collection) with self._mark_collection_dirty(userid, collection) as update: res = colmgr.set_item(userid, item, data) size = len(data.get("payload", "")) update(res["modified"], res["modified"], size) return res def delete_item(self, userid, collection, item): """Deletes a single item from a collection.""" colmgr = self._get_collection_manager(collection) with self._mark_collection_dirty(userid, collection) as update: ts = colmgr.del_item(userid, item) update(ts, ts) return ts # # Administrative/maintenance methods. # def purge_expired_items(self, grace_period=0, max_per_loop=1000): """Purges items with an expired TTL from the database.""" # We have no way to purge expired items from memcached, as # there's no way to enumerate all the userids. Purging is # instead done on each write for cached collections, with the # expectation that this will be cheap due to low item count. # Therefore, the only thing we can do here is pass on the call. return self.storage.purge_expired_items(grace_period, max_per_loop) # # Private APIs for managing the cached metadata # def _get_metadata(self, userid, recalculate_size=False): """Get the metadata dict, recalculating things if necessary. This method pulls the dict of metadata out of memcache and returns it. If there is no information yet in memcache then it pulls the data from the underlying storage, caches it and then returns it. If recalculate_size is given and True, then the cache size value will be recalculated from the store if it is more than an hour old. """ key = _key(userid, "metadata") data, casid = self.cache.gets(key) # If there is no cached metadata, initialize it from the storage. # Use CAS to avoid overwriting other changes, but don't error out if # the write fails - it just means that someone else beat us to it. if data is None: # Get the mapping of collection names to timestamps. # Make sure to include any cache-only collections. timestamps = self.storage.get_collection_timestamps(userid) for colmgr in self.cached_collections.itervalues(): if colmgr.collection not in timestamps: try: ts = colmgr.get_timestamp(userid) timestamps[colmgr.collection] = ts except CollectionNotFoundError: pass # Get the storage-level modified time. # Make sure it's not less than any collection-level timestamp. ts = self.storage.get_storage_timestamp(userid) if timestamps: ts = max(ts, max(timestamps.itervalues())) # Calculate the total size if requested, # but don't bother if it's not necessary. if not recalculate_size: last_size_recalc = 0 size = 0 else: last_size_recalc = int(time.time()) size = self._recalculate_total_size(userid) # Store it all back into the cache. data = { "size": size, "last_size_recalc": last_size_recalc, "modified": ts, "collections": timestamps, } self.cache.cas(key, data, casid) # Recalculate the size if it appears to be out of date. # Use CAS to avoid clobbering changes but don't let it fail us. elif recalculate_size: recalc_period = time.time() - data["last_size_recalc"] if recalc_period > SIZE_RECALCULATION_PERIOD: data["last_size_recalc"] = int(time.time()) data["size"] = self._recalculate_total_size(userid) self.cache.cas(key, data, casid) return data def _update_total_size(self, userid, size): """Update the cached value for total storage size.""" key = _key(userid, "metadata") data, casid = self.cache.gets(key) if data is None: self._get_metadata(userid) data, casid = self.cache.gets(key) data["last_size_recalc"] = int(time.time()) data["size"] = size self.cache.cas(key, data, casid) def _recalculate_total_size(self, userid): """Re-calculate total size from the database.""" size = self.storage.get_total_size(userid) for colmgr in self.cache_only_collections.itervalues(): try: items = colmgr.get_items(userid)["items"] payloads = (item.get("payload", "") for item in items) size += sum(len(p) for p in payloads) except CollectionNotFoundError: pass return size @contextlib.contextmanager def _mark_collection_dirty(self, userid, collection): """Context manager for marking collections as dirty during write. To prevent the cache from getting out of sync with the underlying store it is necessary to mark a collection as dirty before performing any modifications on it. This is a handy context manager that can take care of that, as well as update the timestamps with new results when the modification is complete. The context object associated with this method is a callback function that can be used to update the stored metadata. It accepts the top- level storage timestamp, collection-level timestamp, and a total size increment as its three arguments. Example usage:: with self._mark_collection_dirty(userid, collection) as update: colobj = self._get_collection_manager(collection) ts = colobj.set_item(userid, "test", {"payload": "TEST"}) update(ts, ts, len("TEST")) """ # Get the old values from the metadata. # We can't call _get_metadata directly because we also want the casid. key = _key(userid, "metadata") data, casid = self.cache.gets(key) if data is None: # No cached data, so refresh. self._get_metadata(userid) data, casid = self.cache.gets(key) # Write None into the metadata to mark things as dirty. ts = data["modified"] col_ts = data["collections"].get(collection) data["modified"] = None data["collections"][collection] = None if not self.cache.cas(key, data, casid): raise ConflictError # Define the callback function for the calling code to use. # We also use this function internally to recover from errors. update_was_called = [] def update(ts=ts, col_ts=col_ts, size_incr=0): assert not update_was_called update_was_called.append(True) data["modified"] = ts if col_ts is None: del data["collections"][collection] else: data["collections"][collection] = col_ts data["size"] += size_incr # We assume the write lock is held to avoid conflicting changes. # Sadly, using CAS again would require another round-trip. self.cache.set(key, data) # Yield out to the calling code. # It can call the yielded function to provide new metadata. # If they don't call it, then we cannot make any assumptions about # the consistency of the cached data and must leave things marked # as dirty until another write cleans it up. try: yield update except StorageError: # If a storage-related error occurs, then we know that the # operation wrapped by the calling code did not succeed. # It's therefore safe to roll back to previous values. if not update_was_called: update() raise
class MemcachedStorage(SyncStorage): """Memcached caching wrapper for SyncStorage backends. The SyncStorage implementation wraps another storage backend to provide a caching layer. You may specify the following arguments: * storage: the underlying SyncStorage object that is to be wrapped. * cache_servers: a list of memcached server URLs. * cached_collections: a list of names of collections that should be duplicated into memcache for fast access. * cache_only_collections: a list of names of collections that should be stored *only* in memcached, and never written through to the bacend. * cache_key_prefix: a string to be prepended to all memcached keys, useful for namespacing in shared cache setups. * cache_pool_size: the maximum number of active memcache clients. * cache_pool_timeout: the maximum lifetime of each memcache client. """ def __init__(self, storage, cache_servers=None, cache_key_prefix="", cache_pool_size=None, cache_pool_timeout=60, cached_collections=(), cache_only_collections=(), cache_lock=False, cache_lock_ttl=None, **kwds): self.storage = storage self.cache = MemcachedClient(cache_servers, cache_key_prefix, cache_pool_size, cache_pool_timeout) self.cached_collections = {} for collection in aslist(cached_collections): colmgr = CachedManager(self, collection) self.cached_collections[collection] = colmgr self.cache_only_collections = {} for collection in aslist(cache_only_collections): colmgr = CacheOnlyManager(self, collection) self.cache_only_collections[collection] = colmgr self.cache_lock = cache_lock if cache_lock_ttl is None: self.cache_lock_ttl = DEFAULT_CACHE_LOCK_TTL else: self.cache_lock_ttl = cache_lock_ttl # Keep a threadlocal to track the currently-held locks. # This is needed to make the read locking API reentrant. self._tldata = threading.local() def iter_cache_keys(self, user): """Iterator over all potential cache keys for the given user. This method yields all potential cache keys for the given user, including their metadata key and the keys for any cached collections. The yielded keys do *not* include the key prefix, if any. """ yield _key(user["uid"], "metadata") for colmgr in self.cached_collections.itervalues(): for key in colmgr.iter_cache_keys(user): yield key for colmgr in self.cache_only_collections.itervalues(): for key in colmgr.iter_cache_keys(user): yield key def _get_collection_manager(self, collection): """Get a collection-management object for the named collection. This class delegates all collection-level operations to a "collection manager" object. The manager for a given collection will be different depending on the required caching characteristics, and this method gets and returns on appropriate manager for the named collection. """ try: return self.cached_collections[collection] except KeyError: try: return self.cache_only_collections[collection] except KeyError: return UncachedManager(self, collection) # # APIs for collection-level locking. # # This class provides the option of locking at the memcache level rather # than calling through to the underlying storage engine. Such locks # are just simple mutex keys in memcache, one per collection. If you # can successfully add the key then you get the lock, if it already # exists then someone else holds the lock. If you crash while holding # the lock, it will eventually expire. # def lock_for_read(self, user, collection): """Acquire a shared read lock on the named collection.""" if self.cache_lock or collection in self.cache_only_collections: return self._lock_in_memcache(user, collection) else: return self.storage.lock_for_read(user, collection) def lock_for_write(self, user, collection): """Acquire an exclusive write lock on the named collection.""" if self.cache_lock or collection in self.cache_only_collections: return self._lock_in_memcache(user, collection) else: return self.storage.lock_for_write(user, collection) @contextlib.contextmanager def _lock_in_memcache(self, user, collection): """Helper method to take a memcache-level lock on a collection.""" userid = user["uid"] # Use a thread-local set of held locks to make this reentrant. try: locked_collections = self._tldata.locked_collections except AttributeError: locked_collections = self._tldata.locked_collections = set() if (userid, collection) in locked_collections: yield None return # Take the lock in memcached. ttl = self.cache_lock_ttl now = time.time() key = _key(userid, "lock", collection) if not self.cache.add(key, True, time=ttl): raise ConflictError locked_collections.add((userid, collection)) try: yield None finally: locked_collections.remove((userid, collection)) if time.time() - now >= ttl: msg = "Lock expired while we were holding it" raise RuntimeError(msg) self.cache.delete(key) # # APIs to operate on the entire storage. # def get_storage_timestamp(self, user): """Returns the last-modified timestamp for the entire storage.""" # Try to use the cached value. ts = self._get_metadata(user)["modified"] # Fall back to live data if it's dirty. if ts is None: ts = self.storage.get_storage_timestamp(user) for colmgr in self.cache_only_collections.itervalues(): try: ts = max(ts, colmgr.get_timestamp(user)) except CollectionNotFoundError: pass return ts def get_collection_timestamps(self, user): """Returns the collection timestamps for a user.""" # Try to use the cached value. timestamps = self._get_metadata(user)["collections"] # Fall back to live data for any collections that are dirty. for collection, ts in timestamps.items(): if ts is None: colmgr = self._get_collection_manager(collection) try: timestamps[collection] = colmgr.get_timestamp(user) except CollectionNotFoundError: del timestamps[collection] return timestamps def get_collection_counts(self, user): """Returns the collection counts.""" # Read most of the data from the database. counts = self.storage.get_collection_counts(user) # Add in counts for collections stored only in memcache. for colmgr in self.cache_only_collections.itervalues(): try: items = colmgr.get_items(user)["items"] except CollectionNotFoundError: pass else: counts[colmgr.collection] = len(items) return counts def get_collection_sizes(self, user): """Returns the total size for each collection.""" # Read most of the data from the database. sizes = self.storage.get_collection_sizes(user) # Add in sizes for collections stored only in memcache. for colmgr in self.cache_only_collections.itervalues(): try: items = colmgr.get_items(user)["items"] payloads = (item.get("payload", "") for item in items) sizes[colmgr.collection] = sum(len(p) for p in payloads) except CollectionNotFoundError: pass # Since we've just gone to the trouble of recalculating sizes, # we might as well update the cached total size as well. self._update_total_size(user, sum(sizes.itervalues())) return sizes def get_total_size(self, user, recalculate=False): """Returns the total size of a user's storage data.""" return self._get_metadata(user, recalculate)["size"] def delete_storage(self, user): """Removes all data for the user.""" for key in self.iter_cache_keys(user): self.cache.delete(key) self.storage.delete_storage(user) # # APIs to operate on an individual collection # def get_collection_timestamp(self, user, collection): """Returns the last-modified timestamp for the named collection.""" # It's likely cheaper to read all cached timestamps out of memcache # than to read just the single timestamp from the database. timestamps = self.get_collection_timestamps(user) try: ts = timestamps[collection] except KeyError: raise CollectionNotFoundError # Refresh from the live data if dirty. if ts is None: colmgr = self._get_collection_manager(collection) ts = colmgr.get_timestamp(user) return ts def get_items(self, user, collection, **kwds): """Returns items from a collection""" colmgr = self._get_collection_manager(collection) return colmgr.get_items(user, **kwds) def get_item_ids(self, user, collection, **kwds): """Returns item idss from a collection""" colmgr = self._get_collection_manager(collection) return colmgr.get_item_ids(user, **kwds) def set_items(self, user, collection, items): """Creates or updates multiple items in a collection.""" colmgr = self._get_collection_manager(collection) with self._mark_collection_dirty(user, collection) as update: ts = colmgr.set_items(user, items) size = sum(len(item.get("payload", "")) for item in items) update(ts, ts, size) return ts def delete_collection(self, user, collection): """Deletes an entire collection.""" colmgr = self._get_collection_manager(collection) with self._mark_collection_dirty(user, collection) as update: ts = colmgr.del_collection(user) update(ts, None) return ts def delete_items(self, user, collection, items): """Deletes multiple items from a collection.""" colmgr = self._get_collection_manager(collection) with self._mark_collection_dirty(user, collection) as update: ts = colmgr.del_items(user, items) update(ts, ts) return ts def create_batch(self, user, collection): """Creates batch for a give user's collection.""" colmgr = self._get_collection_manager(collection) return colmgr.create_batch(user) def valid_batch(self, user, collection, batchid): """Verifies that a batch ID is valid""" colmgr = self._get_collection_manager(collection) return colmgr.valid_batch(user, batchid) def append_items_to_batch(self, user, collection, batchid, items): """Appends items to the pending batch.""" colmgr = self._get_collection_manager(collection) with self._mark_collection_dirty(user, collection) as update: ts = colmgr.append_items_to_batch(user, batchid, items) # Account for the size of the new items as they come in, # since that's the only opportunity we have to see them. # Don't update the timestamp yet though, as they're not committed. size = sum(len(item.get("payload", "")) for item in items) update(size_incr=size) return ts def apply_batch(self, user, collection, batchid): """Applies the batch""" colmgr = self._get_collection_manager(collection) with self._mark_collection_dirty(user, collection) as update: ts = colmgr.apply_batch(user, batchid) update(ts, ts) return ts def close_batch(self, user, collection, batchid): colmgr = self._get_collection_manager(collection) return colmgr.close_batch(user, batchid) # # Items APIs # def get_item_timestamp(self, user, collection, item): """Returns the last-modified timestamp for the named item.""" colmgr = self._get_collection_manager(collection) return colmgr.get_item_timestamp(user, item) def get_item(self, user, collection, item): """Returns one item from a collection.""" colmgr = self._get_collection_manager(collection) return colmgr.get_item(user, item) def set_item(self, user, collection, item, data): """Creates or updates a single item in a collection.""" colmgr = self._get_collection_manager(collection) with self._mark_collection_dirty(user, collection) as update: res = colmgr.set_item(user, item, data) size = len(data.get("payload", "")) update(res["modified"], res["modified"], size) return res def delete_item(self, user, collection, item): """Deletes a single item from a collection.""" colmgr = self._get_collection_manager(collection) with self._mark_collection_dirty(user, collection) as update: ts = colmgr.del_item(user, item) update(ts, ts) return ts def is_migrating(self, user): """Is the user in migration?""" return self.storage.is_migrating(user) # # Administrative/maintenance methods. # def purge_expired_items(self, grace_period=0, max_per_loop=1000): """Purges items with an expired TTL from the database.""" # We have no way to purge expired items from memcached, as # there's no way to enumerate all the userids. Purging is # instead done on each write for cached collections, with the # expectation that this will be cheap due to low item count. # Therefore, the only thing we can do here is pass on the call. return self.storage.purge_expired_items(grace_period, max_per_loop) # # Private APIs for managing the cached metadata # def _get_metadata(self, user, recalculate_size=False): """Get the metadata dict, recalculating things if necessary. This method pulls the dict of metadata out of memcache and returns it. If there is no information yet in memcache then it pulls the data from the underlying storage, caches it and then returns it. If recalculate_size is given and True, then the cache size value will be recalculated from the store if it is more than an hour old. """ key = _key(user["uid"], "metadata") data, casid = self.cache.gets(key) # If there is no cached metadata, initialize it from the storage. # Use CAS to avoid overwriting other changes, but don't error out if # the write fails - it just means that someone else beat us to it. if data is None: # Get the mapping of collection names to timestamps. # Make sure to include any cache-only collections. timestamps = self.storage.get_collection_timestamps(user) for colmgr in self.cached_collections.itervalues(): if colmgr.collection not in timestamps: try: ts = colmgr.get_timestamp(user) timestamps[colmgr.collection] = ts except CollectionNotFoundError: pass # Get the storage-level modified time. # Make sure it's not less than any collection-level timestamp. ts = self.storage.get_storage_timestamp(user) if timestamps: ts = max(ts, max(timestamps.itervalues())) # Calculate the total size if requested, # but don't bother if it's not necessary. if not recalculate_size: last_size_recalc = 0 size = 0 else: last_size_recalc = int(time.time()) size = self._recalculate_total_size(user) # Store it all back into the cache. data = { "size": size, "last_size_recalc": last_size_recalc, "modified": ts, "collections": timestamps, } self.cache.cas(key, data, casid) # Recalculate the size if it appears to be out of date. # Use CAS to avoid clobbering changes but don't let it fail us. elif recalculate_size: recalc_period = time.time() - data["last_size_recalc"] if recalc_period > SIZE_RECALCULATION_PERIOD: data["last_size_recalc"] = int(time.time()) data["size"] = self._recalculate_total_size(user) self.cache.cas(key, data, casid) return data def _update_total_size(self, user, size): """Update the cached value for total storage size.""" key = _key(user["uid"], "metadata") data, casid = self.cache.gets(key) if data is None: self._get_metadata(user) data, casid = self.cache.gets(key) data["last_size_recalc"] = int(time.time()) data["size"] = size self.cache.cas(key, data, casid) def _recalculate_total_size(self, user): """Re-calculate total size from the database.""" size = self.storage.get_total_size(user) for colmgr in self.cache_only_collections.itervalues(): try: items = colmgr.get_items(user)["items"] payloads = (item.get("payload", "") for item in items) size += sum(len(p) for p in payloads) except CollectionNotFoundError: pass return size @contextlib.contextmanager def _mark_collection_dirty(self, user, collection): """Context manager for marking collections as dirty during write. To prevent the cache from getting out of sync with the underlying store it is necessary to mark a collection as dirty before performing any modifications on it. This is a handy context manager that can take care of that, as well as update the timestamps with new results when the modification is complete. The context object associated with this method is a callback function that can be used to update the stored metadata. It accepts the top- level storage timestamp, collection-level timestamp, and a total size increment as its three arguments. Example usage:: with self._mark_collection_dirty(user, collection) as update: colobj = self._get_collection_manager(collection) ts = colobj.set_item(user, "test", {"payload": "TEST"}) update(ts, ts, len("TEST")) """ # Get the old values from the metadata. # We can't call _get_metadata directly because we also want the casid. key = _key(user["uid"], "metadata") data, casid = self.cache.gets(key) if data is None: # No cached data, so refresh. self._get_metadata(user) data, casid = self.cache.gets(key) # Write None into the metadata to mark things as dirty. ts = data["modified"] col_ts = data["collections"].get(collection) data["modified"] = None data["collections"][collection] = None if not self.cache.cas(key, data, casid): raise ConflictError # Define the callback function for the calling code to use. # We also use this function internally to recover from errors. update_was_called = [] def update(ts=ts, col_ts=col_ts, size_incr=0): assert not update_was_called update_was_called.append(True) data["modified"] = ts if col_ts is None: del data["collections"][collection] else: data["collections"][collection] = col_ts data["size"] += size_incr # We assume the write lock is held to avoid conflicting changes. # Sadly, using CAS again would require another round-trip. self.cache.set(key, data) # Yield out to the calling code. # It can call the yielded function to provide new metadata. # If they don't call it, then we cannot make any assumptions about # the consistency of the cached data and must leave things marked # as dirty until another write cleans it up. try: yield update except StorageError: # If a storage-related error occurs, then we know that the # operation wrapped by the calling code did not succeed. # It's therefore safe to roll back to previous values. if not update_was_called: update() raise
class MemcachedNonceCache(object): """Object for managing a cache of used nonce values in memcached. This class allow easy timestamp-based management of client-generated nonces according to the rules of RFC-TODO: * Maintain a measure of clock skew for each MAC id. * Reject nonces with a timestamp outside the configured range. * Reject nonces that have already been seen. It stores the nonces in memcached so that they can be shared between different webserver processes. The clock-skew for each id token is stored under they key sha1(<id>:skew), while each nonce seen for the id is marked by a key sha1(<id>:nonce:<timestamp>:<nonce>). NOTE: the "MAC id" here corresponds to the full authentication token issues by the tokenserver, not to the numeric userid of an individual user. So it is entirely possible to have multiple skew records for each user, corresponding to different active tokens. """ def __init__(self, nonce_ttl=None, id_ttl=None, cache_servers=None, cache_key_prefix="noncecache:", cache_pool_size=None, cache_pool_timeout=60, **kwds): # Memcached ttls are in integer seconds, so round up to the nearest. if nonce_ttl is None: nonce_ttl = DEFAULT_NONCE_TTL else: nonce_ttl = int(math.ceil(nonce_ttl)) if id_ttl is None: id_ttl = DEFAULT_ID_TTL else: id_ttl = int(math.ceil(id_ttl)) self.nonce_ttl = nonce_ttl self.id_ttl = id_ttl self.mcclient = MemcachedClient(cache_servers, cache_key_prefix, cache_pool_size, cache_pool_timeout) def _key(self, *names): """Get a memcached key built from the given component names. This method returns the memcached key to use for the given component names, by contatentating them together and then hashing them. The hashing serves both to ensure confidentiality of the macauth tokens store in memcached, and the reduce the size of the keys. """ return urlsafe_b64encode(sha1(":".join(names)).digest()) def check_nonce(self, id, timestamp, nonce): """Check if the given timestamp+nonce is fresh for the given id. This method checks that the given timestamp+nonce has not previously been seen for the given id. It returns True if the nonce is fresh and False if not. Fresh nonces are added to the cache, so that subsequent checks of the same nonce will return False. """ # We want to fetch the recorded clock skew for this id, along with # any existing cache entry for the provided nonce. key_skew = self._key(id, "skew") key_nonce = self._key(id, "nonce", str(timestamp), nonce) # Use get_multi to fetch both keys in a single request. # If the data appears to be corrupted then fail out for safety. try: cached = self.mcclient.get_multi([key_skew, key_nonce]) except ValueError: return False # If the nonce appears in the cache, it must be stale. if key_nonce in cached: return False # If we've never recorded a clock skew for this id, record it now. try: skew = cached[key_skew] except KeyError: skew = int(time.time() - timestamp) self.mcclient.add(key_skew, skew, time=self.id_ttl) # If the adjusted timestamp is too old or too new, it is stale. # XXX TODO: we should use a monotonic clock here. if abs(timestamp + skew - time.time()) >= self.nonce_ttl: return False # The nonce is fresh, add it into the cache. self.mcclient.add(key_nonce, True, time=self.nonce_ttl) return True