Example #1
0
    def __init__(self, couchdb_url:str, couchdb_name:str, buffer_capacity:int = 1000,
                                                          buffer_latency:timedelta = timedelta(milliseconds=50),
                                                          **kwargs):
        """
        Constructor: Initialise the database interfaces

        @param  couchdb_url      CouchDB URL
        @param  couchdb_name     Database name
        @param  buffer_capacity  Buffer capacity
        @param  buffer_latency   Buffer latency
        """
        super().__init__()
        self._sofa = Sofabed(couchdb_url, couchdb_name, buffer_capacity, buffer_latency, **kwargs)
        self._queue = _Bert(self._sofa)
        self._metadata = _Ernie(self._sofa)

        self._queue_lock = CountingLock()
        self._pending_cache = deque()

        self._latency = buffer_latency.total_seconds()
Example #2
0
class BiscuitTin(CookieJar):
    """ Persistent implementation of `CookieJar` """
    def __init__(self, couchdb_url:str, couchdb_name:str, buffer_capacity:int = 1000,
                                                          buffer_latency:timedelta = timedelta(milliseconds=50),
                                                          **kwargs):
        """
        Constructor: Initialise the database interfaces

        @param  couchdb_url      CouchDB URL
        @param  couchdb_name     Database name
        @param  buffer_capacity  Buffer capacity
        @param  buffer_latency   Buffer latency
        """
        super().__init__()
        self._sofa = Sofabed(couchdb_url, couchdb_name, buffer_capacity, buffer_latency, **kwargs)
        self._queue = _Bert(self._sofa)
        self._metadata = _Ernie(self._sofa)

        self._queue_lock = CountingLock()
        self._pending_cache = deque()

        self._latency = buffer_latency.total_seconds()

    def _broadcast(self):
        """
        Broadcast to all listeners
        This should be called on queue changes
        """
        self.notify_listeners()

    def _get_cookie(self, identifier: str) -> Optional[Cookie]:
        """
        This method *actually* fetches the Cookie, but is not targeted
        by the rate limiter
        """
        _, doc = self._queue.get_by_identifier(identifier) or (None, None)

        if doc is None:
            return None

        cookie = Cookie(identifier)
        cookie.enrichments = EnrichmentCollection(self._metadata.get_metadata(identifier))

        return cookie

    def fetch_cookie(self, identifier: str) -> Optional[Cookie]:
        return self._get_cookie(identifier)

    def delete_cookie(self, identifier: str):
        self._metadata.delete_metadata(identifier)
        self._queue.delete(identifier)

    def enrich_cookie(self, identifier: str, enrichment: Enrichment, mark_for_processing: bool=True):
        self._metadata.enrich(identifier, enrichment)
        if mark_for_processing:
            self._queue.mark_dirty(identifier)
            self._broadcast()

    def mark_as_failed(self, identifier: str, requeue_delay: timedelta=timedelta(0)):
        self._queue.mark_finished(identifier)
        self._queue.mark_dirty(identifier, requeue_delay)
        logging.debug('%s has been marked as failed', identifier)

        # Broadcast the change after the requeue delay
        # FIXME? Timer's interval may not be 100% accurate and may also
        # not correspond with the database server; this could go out of
        # synch... Add a tolerance??
        Timer(requeue_delay.total_seconds(), self._broadcast).start()

    def mark_as_complete(self, identifier: str):
        self._queue.mark_finished(identifier)
        logging.debug('%s has been marked as complete', identifier)

    def mark_for_processing(self, identifier: str):
        self._queue.mark_dirty(identifier)

        self._broadcast()

    def get_next_for_processing(self) -> Optional[Cookie]:
        with self._queue_lock:
            if not self._pending_cache:
                # Dequeue up to as many Cookies (IDs) as there are
                # waiting threads, plus one for the current thread
                waiting = self._queue_lock.waiting_to_acquire()
                logging.debug('Fetching up to %d cookies for processing...', waiting + 1)
                to_process = self._queue.dequeue(waiting + 1)

                if not to_process:
                    return None

                self._pending_cache.extend([
                    self._get_cookie(doc_id)
                    for doc_id in to_process
                ])

            return self._pending_cache.popleft()

    def queue_length(self) -> int:
        return self._queue.queue_length()