def _get(self, name, project=None, fields={'m': 1, '_id': 0}): queue = self._collection.find_one(_get_scoped_query(name, project), fields=fields) if queue is None: raise exceptions.QueueDoesNotExist(name, project) return queue
def _get_counter(self, name, project=None): """Retrieves the current message counter value for a given queue. This helper is used to generate monotonic pagination markers that are saved as part of the message document. Note 1: Markers are scoped per-queue and so are *not* globally unique or globally ordered. Note 2: If two or more requests to this method are made in parallel, this method will return the same counter value. This is done intentionally so that the caller can detect a parallel message post, allowing it to mitigate race conditions between producer and observer clients. :param name: Name of the queue to which the counter is scoped :param project: Queue's project :returns: current message counter as an integer """ doc = self._collection.find_one(_get_scoped_query(name, project), fields={ 'c.v': 1, '_id': 0 }) if doc is None: raise exceptions.QueueDoesNotExist(name, project) return doc['c']['v']
def stats(self, name, project=None): if not self.exists(name, project=project): raise exceptions.QueueDoesNotExist(name, project) controller = self.driver.message_controller active = controller._count(name, project=project, include_claimed=False) total = controller._count(name, project=project, include_claimed=True) message_stats = { 'claimed': total - active, 'free': active, 'total': total, } try: oldest = controller.first(name, project=project, sort=1) newest = controller.first(name, project=project, sort=-1) except exceptions.QueueIsEmpty: pass else: now = timeutils.utcnow_ts() message_stats['oldest'] = utils.stat_message(oldest, now) message_stats['newest'] = utils.stat_message(newest, now) return {'messages': message_stats}
def get_qid(driver, queue, project): try: return driver.get(''' select id from Queues where project = ? and name = ?''', project, queue)[0] except NoResult: raise exceptions.QueueDoesNotExist(queue, project)
def _inc_counter(self, name, project=None, amount=1, window=None): """Increments the message counter and returns the new value. :param name: Name of the queue to which the counter is scoped :param project: Queue's project name :param amount: (Default 1) Amount by which to increment the counter :param window: (Default None) A time window, in seconds, that must have elapsed since the counter was last updated, in order to increment the counter. :returns: Updated message counter value, or None if window was specified, and the counter has already been updated within the specified time period. :raises: storage.exceptions.QueueDoesNotExist """ now = timeutils.utcnow_ts() update = {'$inc': {'c.v': amount}, '$set': {'c.t': now}} query = _get_scoped_query(name, project) if window is not None: threshold = now - window query['c.t'] = {'$lt': threshold} while True: try: doc = self._collection.find_and_modify(query, update, new=True, fields={ 'c.v': 1, '_id': 0 }) break except pymongo.errors.AutoReconnect as ex: LOG.exception(ex) if doc is None: if window is None: # NOTE(kgriffs): Since we did not filter by a time window, # the queue should have been found and updated. Perhaps # the queue has been deleted? msgtmpl = _(u'Failed to increment the message ' u'counter for queue %(name)s and ' u'project %(project)s') LOG.warning(msgtmpl, dict(name=name, project=project)) raise exceptions.QueueDoesNotExist(name, project) # NOTE(kgriffs): Assume the queue existed, but the counter # was recently updated, causing the range query on 'c.t' to # exclude the record. return None return doc['c']['v']
def set_metadata(self, name, metadata, project=None): rst = self._collection.update(_get_scoped_query(name, project), {'$set': { 'm': metadata }}, multi=False, manipulate=False) if not rst['updatedExisting']: raise exceptions.QueueDoesNotExist(name, project)
def get_metadata(self, name, project): if project is None: project = '' try: return self.driver.get(''' select metadata from Queues where project = ? and name = ?''', project, name)[0] except utils.NoResult: raise exceptions.QueueDoesNotExist(name, project)
def set_metadata(self, name, metadata, project): if project is None: project = '' self.driver.run(''' update Queues set metadata = ? where project = ? and name = ? ''', self.driver.pack(metadata), project, name) if not self.driver.affected: raise exceptions.QueueDoesNotExist(name, project)
def post(self, queue_name, messages, client_uuid, project=None): if not self._queue_ctrl.exists(queue_name, project): raise exceptions.QueueDoesNotExist(queue_name, project) now = timeutils.utcnow_ts() now_dt = datetime.datetime.utcfromtimestamp(now) collection = self._collection(queue_name, project) # Set the next basis marker for the first attempt. next_marker = self._queue_ctrl._get_counter(queue_name, project) prepared_messages = [ { 't': message['ttl'], 'p_q': utils.scope_queue_name(queue_name, project), 'e': now_dt + datetime.timedelta(seconds=message['ttl']), 'u': client_uuid, 'c': {'id': None, 'e': now}, 'b': message['body'] if 'body' in message else {}, 'k': next_marker + index, } for index, message in enumerate(messages) ] # Use a retry range for sanity, although we expect # to rarely, if ever, reach the maximum number of # retries. # # NOTE(kgriffs): With the default configuration (100 ms # max sleep, 1000 max attempts), the max stall time # before the operation is abandoned is 49.95 seconds. for attempt in self._retry_range: try: ids = collection.insert(prepared_messages) # Log a message if we retried, for debugging perf issues if attempt != 0: msgtmpl = _(u'%(attempts)d attempt(s) required to post ' u'%(num_messages)d messages to queue ' u'"%(queue)s" under project %(project)s') LOG.debug(msgtmpl, dict(queue=queue_name, attempts=attempt + 1, num_messages=len(ids), project=project)) # Update the counter in preparation for the next batch # # NOTE(kgriffs): Due to the unique index on the messages # collection, competing inserts will fail as a whole, # and keep retrying until the counter is incremented # such that the competing marker's will start at a # unique number, 1 past the max of the messages just # inserted above. self._queue_ctrl._inc_counter(queue_name, project, amount=len(ids)) return map(str, ids) except pymongo.errors.DuplicateKeyError as ex: # Try again with the remaining messages # TODO(kgriffs): Record stats of how often retries happen, # and how many attempts, on average, are required to insert # messages. # NOTE(kgriffs): This can be used in conjunction with the # log line, above, that is emitted after all messages have # been posted, to guage how long it is taking for messages # to be posted to a given queue, or overall. # # TODO(kgriffs): Add transaction ID to help match up loglines if attempt == 0: msgtmpl = _(u'First attempt failed while ' u'adding messages to queue ' u'"%(queue)s" under project %(project)s') LOG.debug(msgtmpl, dict(queue=queue_name, project=project)) # NOTE(kgriffs): Never retry past the point that competing # messages expire and are GC'd, since once they are gone, # the unique index no longer protects us from getting out # of order, which could cause an observer to miss this # message. The code below provides a sanity-check to ensure # this situation can not happen. elapsed = timeutils.utcnow_ts() - now if elapsed > MAX_RETRY_POST_DURATION: msgtmpl = _(u'Exceeded maximum retry duration for queue ' u'"%(queue)s" under project %(project)s') LOG.warning(msgtmpl, dict(queue=queue_name, project=project)) break # Chill out for a moment to mitigate thrashing/thundering self._backoff_sleep(attempt) # NOTE(kgriffs): Perhaps we failed because a worker crashed # after inserting messages, but before incrementing the # counter; that would cause all future requests to stall, # since they would keep getting the same base marker that is # conflicting with existing messages, until the messages that # "won" expire, at which time we would end up reusing markers, # and that could make some messages invisible to an observer # that is querying with a marker that is large than the ones # being reused. # # To mitigate this, we apply a heuristic to determine whether # a counter has stalled. We attempt to increment the counter, # but only if it hasn't been updated for a few seconds, which # should mean that nobody is left to update it! # # Note that we increment one at a time until the logjam is # broken, since we don't know how many messages were posted # by the worker before it crashed. next_marker = self._queue_ctrl._inc_counter( queue_name, project, window=COUNTER_STALL_WINDOW) # Retry the entire batch with a new sequence of markers. # # NOTE(kgriffs): Due to the unique index, and how # MongoDB works with batch requests, we will never # end up with a partially-successful update. The first # document in the batch will fail to insert, and the # remainder of the documents will not be attempted. if next_marker is None: # NOTE(kgriffs): Usually we will end up here, since # it should be rare that a counter becomes stalled. next_marker = self._queue_ctrl._get_counter( queue_name, project) else: msgtmpl = (u'Detected a stalled message counter for ' u'queue "%(queue)s" under project %(project)s. ' u'The counter was incremented to %(value)d.') LOG.warning(msgtmpl, dict(queue=queue_name, project=project, value=next_marker)) for index, message in enumerate(prepared_messages): message['k'] = next_marker + index except Exception as ex: # TODO(kgriffs): Query the DB to get the last marker that # made it, and extrapolate from there to figure out what # needs to be retried. LOG.exception(ex) raise msgtmpl = _(u'Hit maximum number of attempts (%(max)s) for queue ' u'"%(queue)s" under project %(project)s') LOG.warning(msgtmpl, dict(max=self.driver.mongodb_conf.max_attempts, queue=queue_name, project=project)) succeeded_ids = [] raise exceptions.MessageConflict(queue_name, project, succeeded_ids)