def __init__(self, couchdb_url:str, couchdb_name:str, buffer_capacity:int = 1000, buffer_latency:timedelta = timedelta(milliseconds=50), **kwargs): """ Constructor: Initialise the database interfaces @param couchdb_url CouchDB URL @param couchdb_name Database name @param buffer_capacity Buffer capacity @param buffer_latency Buffer latency """ super().__init__() self._sofa = Sofabed(couchdb_url, couchdb_name, buffer_capacity, buffer_latency, **kwargs) self._queue = _Bert(self._sofa) self._metadata = _Ernie(self._sofa) self._queue_lock = CountingLock() self._pending_cache = deque() self._latency = buffer_latency.total_seconds()
class BiscuitTin(CookieJar): """ Persistent implementation of `CookieJar` """ def __init__(self, couchdb_url:str, couchdb_name:str, buffer_capacity:int = 1000, buffer_latency:timedelta = timedelta(milliseconds=50), **kwargs): """ Constructor: Initialise the database interfaces @param couchdb_url CouchDB URL @param couchdb_name Database name @param buffer_capacity Buffer capacity @param buffer_latency Buffer latency """ super().__init__() self._sofa = Sofabed(couchdb_url, couchdb_name, buffer_capacity, buffer_latency, **kwargs) self._queue = _Bert(self._sofa) self._metadata = _Ernie(self._sofa) self._queue_lock = CountingLock() self._pending_cache = deque() self._latency = buffer_latency.total_seconds() def _broadcast(self): """ Broadcast to all listeners This should be called on queue changes """ self.notify_listeners() def _get_cookie(self, identifier: str) -> Optional[Cookie]: """ This method *actually* fetches the Cookie, but is not targeted by the rate limiter """ _, doc = self._queue.get_by_identifier(identifier) or (None, None) if doc is None: return None cookie = Cookie(identifier) cookie.enrichments = EnrichmentCollection(self._metadata.get_metadata(identifier)) return cookie def fetch_cookie(self, identifier: str) -> Optional[Cookie]: return self._get_cookie(identifier) def delete_cookie(self, identifier: str): self._metadata.delete_metadata(identifier) self._queue.delete(identifier) def enrich_cookie(self, identifier: str, enrichment: Enrichment, mark_for_processing: bool=True): self._metadata.enrich(identifier, enrichment) if mark_for_processing: self._queue.mark_dirty(identifier) self._broadcast() def mark_as_failed(self, identifier: str, requeue_delay: timedelta=timedelta(0)): self._queue.mark_finished(identifier) self._queue.mark_dirty(identifier, requeue_delay) logging.debug('%s has been marked as failed', identifier) # Broadcast the change after the requeue delay # FIXME? Timer's interval may not be 100% accurate and may also # not correspond with the database server; this could go out of # synch... Add a tolerance?? Timer(requeue_delay.total_seconds(), self._broadcast).start() def mark_as_complete(self, identifier: str): self._queue.mark_finished(identifier) logging.debug('%s has been marked as complete', identifier) def mark_for_processing(self, identifier: str): self._queue.mark_dirty(identifier) self._broadcast() def get_next_for_processing(self) -> Optional[Cookie]: with self._queue_lock: if not self._pending_cache: # Dequeue up to as many Cookies (IDs) as there are # waiting threads, plus one for the current thread waiting = self._queue_lock.waiting_to_acquire() logging.debug('Fetching up to %d cookies for processing...', waiting + 1) to_process = self._queue.dequeue(waiting + 1) if not to_process: return None self._pending_cache.extend([ self._get_cookie(doc_id) for doc_id in to_process ]) return self._pending_cache.popleft() def queue_length(self) -> int: return self._queue.queue_length()