def _get_counter(self, name, project=None): """Retrieves the current message counter value for a given queue. This helper is used to generate monotonic pagination markers that are saved as part of the message document. Note 1: Markers are scoped per-queue and so are *not* globally unique or globally ordered. Note 2: If two or more requests to this method are made in parallel, this method will return the same counter value. This is done intentionally so that the caller can detect a parallel message post, allowing it to mitigate race conditions between producer and observer clients. :param name: Name of the queue to which the counter is scoped :param project: Queue's project :returns: current message counter as an integer """ doc = self._collection.find_one(_get_scoped_query(name, project), fields={ 'c.v': 1, '_id': 0 }) if doc is None: raise errors.QueueDoesNotExist(name, project) return doc['c']['v']
def stats(self, name, project=None): if not self.exists(name, project=project): raise errors.QueueDoesNotExist(name, project) controller = self.driver.message_controller active = controller._count(name, project=project, include_claimed=False) total = controller._count(name, project=project, include_claimed=True) message_stats = { 'claimed': total - active, 'free': active, 'total': total, } try: oldest = controller.first(name, project=project, sort=1) newest = controller.first(name, project=project, sort=-1) except errors.QueueIsEmpty: pass else: now = timeutils.utcnow_ts() message_stats['oldest'] = utils.stat_message(oldest, now) message_stats['newest'] = utils.stat_message(newest, now) return {'messages': message_stats}
def _get(self, name, project=None, fields={'m': 1, '_id': 0}): queue = self._collection.find_one(_get_scoped_query(name, project), fields=fields) if queue is None: raise errors.QueueDoesNotExist(name, project) return queue
def create(self, queue, subscriber, ttl, options, project=None): source = queue now = timeutils.utcnow_ts() ttl = int(ttl) expires = now + ttl source_query = {'p_q': utils.scope_queue_name(source, project)} target_source = self._queue_collection.find_one(source_query, fields={ 'm': 1, '_id': 0 }) if target_source is None: raise errors.QueueDoesNotExist(target_source, project) try: subscription_id = self._collection.insert({ 's': source, 'u': subscriber, 't': ttl, 'e': expires, 'o': options, 'p': project }) return subscription_id except pymongo.errors.DuplicateKeyError: return None
def _inc_counter(self, name, project=None, amount=1, window=None): """Increments the message counter and returns the new value. :param name: Name of the queue to which the counter is scoped :param project: Queue's project name :param amount: (Default 1) Amount by which to increment the counter :param window: (Default None) A time window, in seconds, that must have elapsed since the counter was last updated, in order to increment the counter. :returns: Updated message counter value, or None if window was specified, and the counter has already been updated within the specified time period. :raises QueueDoesNotExist: if not found """ now = timeutils.utcnow_ts() update = {'$inc': {'c.v': amount}, '$set': {'c.t': now}} query = _get_scoped_query(name, project) if window is not None: threshold = now - window query['c.t'] = {'$lt': threshold} while True: try: doc = self._collection.find_one_and_update( query, update, return_document=ReturnDocument.AFTER, projection={ 'c.v': 1, '_id': 0 }) break except pymongo.errors.AutoReconnect as ex: LOG.exception(ex) if doc is None: if window is None: # NOTE(kgriffs): Since we did not filter by a time window, # the queue should have been found and updated. Perhaps # the queue has been deleted? message = _(u'Failed to increment the message ' u'counter for queue %(name)s and ' u'project %(project)s') message %= dict(name=name, project=project) LOG.warning(message) raise errors.QueueDoesNotExist(name, project) # NOTE(kgriffs): Assume the queue existed, but the counter # was recently updated, causing the range query on 'c.t' to # exclude the record. return None return doc['c']['v']
def set_metadata(self, name, metadata, project=None): rst = self._collection.update(_get_scoped_query(name, project), {'$set': {'m': metadata}}, multi=False, manipulate=False) if not rst['updatedExisting']: raise errors.QueueDoesNotExist(name, project)
def set_metadata(self, name, metadata, project=None): rst = self._collection.update_one(_get_scoped_query(name, project), {'$set': { 'm': metadata }}) if rst.matched_count == 0: raise errors.QueueDoesNotExist(name, project)
def get_metadata(self, name, project=None): queue = self._collection.find_one(_get_scoped_query(name, project), projection={ 'm': 1, '_id': 0 }) if queue is None: raise errors.QueueDoesNotExist(name, project) return queue.get('m', {})
def post(self, queue_name, messages, client_uuid, project=None): # NOTE(flaper87): This method should be safe to retry on # autoreconnect, since we've a 2-step insert for messages. # The worst-case scenario is that we'll increase the counter # several times and we'd end up with some non-active messages. if not self._queue_ctrl.exists(queue_name, project): raise errors.QueueDoesNotExist(queue_name, project) # NOTE(flaper87): Make sure the counter exists. This method # is an upsert. self._get_counter(queue_name, project) now = timeutils.utcnow_ts() now_dt = datetime.datetime.utcfromtimestamp(now) collection = self._collection(queue_name, project) messages = list(messages) msgs_n = len(messages) next_marker = self._inc_counter(queue_name, project, amount=msgs_n) - msgs_n prepared_messages = [{ PROJ_QUEUE: utils.scope_queue_name(queue_name, project), 't': message['ttl'], 'e': now_dt + datetime.timedelta(seconds=message['ttl']), 'u': client_uuid, 'c': { 'id': None, 'e': now, 'c': 0 }, 'd': now + message.get('delay', 0), 'b': message['body'] if 'body' in message else {}, 'k': next_marker + index, 'tx': None, } for index, message in enumerate(messages)] res = collection.insert_many(prepared_messages, bypass_document_validation=True) return [str(id_) for id_ in res.inserted_ids]
def create(self, queue, subscriber, ttl, options, project=None): source = queue now = timeutils.utcnow_ts() ttl = int(ttl) expires = now + ttl if not self._queue_ctrl.exists(source, project): raise errors.QueueDoesNotExist(source, project) try: subscription_id = self._collection.insert({'s': source, 'u': subscriber, 't': ttl, 'e': expires, 'o': options, 'p': project}) return subscription_id except pymongo.errors.DuplicateKeyError: return None
def post(self, queue_name, messages, client_uuid, project=None): if not self._queue_ctrl.exists(queue_name, project): raise errors.QueueDoesNotExist(queue_name, project) now = timeutils.utcnow_ts() now_dt = datetime.datetime.utcfromtimestamp(now) collection = self._collection(queue_name, project) # Set the next basis marker for the first attempt. next_marker = self._queue_ctrl._get_counter(queue_name, project) # Unique transaction ID to facilitate atomic batch inserts transaction = objectid.ObjectId() prepared_messages = [{ PROJ_QUEUE: utils.scope_queue_name(queue_name, project), 't': message['ttl'], 'e': now_dt + datetime.timedelta(seconds=message['ttl']), 'u': client_uuid, 'c': { 'id': None, 'e': now }, 'b': message['body'] if 'body' in message else {}, 'k': next_marker + index, 'tx': transaction, } for index, message in enumerate(messages)] # NOTE(kgriffs): Don't take the time to do a 2-phase insert # if there is no way for it to partially succeed. if len(prepared_messages) == 1: transaction = None prepared_messages[0]['tx'] = None # Use a retry range for sanity, although we expect # to rarely, if ever, reach the maximum number of # retries. # # NOTE(kgriffs): With the default configuration (100 ms # max sleep, 1000 max attempts), the max stall time # before the operation is abandoned is 49.95 seconds. for attempt in self._retry_range: try: ids = collection.insert(prepared_messages) # Log a message if we retried, for debugging perf issues if attempt != 0: msgtmpl = _(u'%(attempts)d attempt(s) required to post ' u'%(num_messages)d messages to queue ' u'"%(queue)s" under project %(project)s') LOG.debug( msgtmpl, dict(queue=queue_name, attempts=attempt + 1, num_messages=len(ids), project=project)) # Update the counter in preparation for the next batch # # NOTE(kgriffs): Due to the unique index on the messages # collection, competing inserts will fail as a whole, # and keep retrying until the counter is incremented # such that the competing marker's will start at a # unique number, 1 past the max of the messages just # inserted above. self._queue_ctrl._inc_counter(queue_name, project, amount=len(ids)) # NOTE(kgriffs): Finalize the insert once we can say that # all the messages made it. This makes bulk inserts # atomic, assuming queries filter out any non-finalized # messages. if transaction is not None: collection.update({'tx': transaction}, {'$set': { 'tx': None }}, upsert=False, multi=True) return map(str, ids) except pymongo.errors.DuplicateKeyError as ex: # TODO(kgriffs): Record stats of how often retries happen, # and how many attempts, on average, are required to insert # messages. # NOTE(kgriffs): This can be used in conjunction with the # log line, above, that is emitted after all messages have # been posted, to gauge how long it is taking for messages # to be posted to a given queue, or overall. # # TODO(kgriffs): Add transaction ID to help match up loglines if attempt == 0: msgtmpl = _(u'First attempt failed while ' u'adding messages to queue ' u'"%(queue)s" under project %(project)s') LOG.debug(msgtmpl, dict(queue=queue_name, project=project)) # NOTE(kgriffs): Never retry past the point that competing # messages expire and are GC'd, since once they are gone, # the unique index no longer protects us from getting out # of order, which could cause an observer to miss this # message. The code below provides a sanity-check to ensure # this situation can not happen. elapsed = timeutils.utcnow_ts() - now if elapsed > MAX_RETRY_POST_DURATION: msgtmpl = _(u'Exceeded maximum retry duration for queue ' u'"%(queue)s" under project %(project)s') LOG.warning(msgtmpl, dict(queue=queue_name, project=project)) break # Chill out for a moment to mitigate thrashing/thundering self._backoff_sleep(attempt) # NOTE(kgriffs): Perhaps we failed because a worker crashed # after inserting messages, but before incrementing the # counter; that would cause all future requests to stall, # since they would keep getting the same base marker that is # conflicting with existing messages, until the messages that # "won" expire, at which time we would end up reusing markers, # and that could make some messages invisible to an observer # that is querying with a marker that is large than the ones # being reused. # # To mitigate this, we apply a heuristic to determine whether # a counter has stalled. We attempt to increment the counter, # but only if it hasn't been updated for a few seconds, which # should mean that nobody is left to update it! # # Note that we increment one at a time until the logjam is # broken, since we don't know how many messages were posted # by the worker before it crashed. next_marker = self._queue_ctrl._inc_counter( queue_name, project, window=COUNTER_STALL_WINDOW) # Retry the entire batch with a new sequence of markers. # # NOTE(kgriffs): Due to the unique index, and how # MongoDB works with batch requests, we will never # end up with a partially-successful update. The first # document in the batch will fail to insert, and the # remainder of the documents will not be attempted. if next_marker is None: # NOTE(kgriffs): Usually we will end up here, since # it should be rare that a counter becomes stalled. next_marker = self._queue_ctrl._get_counter( queue_name, project) else: msgtmpl = (u'Detected a stalled message counter for ' u'queue "%(queue)s" under project %(project)s. ' u'The counter was incremented to %(value)d.') LOG.warning( msgtmpl, dict(queue=queue_name, project=project, value=next_marker)) for index, message in enumerate(prepared_messages): message['k'] = next_marker + index except Exception as ex: LOG.exception(ex) raise msgtmpl = _(u'Hit maximum number of attempts (%(max)s) for queue ' u'"%(queue)s" under project %(project)s') LOG.warning( msgtmpl, dict(max=self.driver.mongodb_conf.max_attempts, queue=queue_name, project=project)) succeeded_ids = [] raise errors.MessageConflict(queue_name, project, succeeded_ids)
def _inc_counter(self, queue_name, project=None, amount=1, window=None): """Increments the message counter and returns the new value. :param queue_name: Name of the queue to which the counter is scoped :param project: Queue's project name :param amount: (Default 1) Amount by which to increment the counter :param window: (Default None) A time window, in seconds, that must have elapsed since the counter was last updated, in order to increment the counter. :returns: Updated message counter value, or None if window was specified, and the counter has already been updated within the specified time period. :raises: storage.errors.QueueDoesNotExist """ # NOTE(flaper87): If this `if` is True, it means we're # using a mongodb in the control plane. To avoid breaking # environments doing so already, we'll keep using the counter # in the mongodb queue_controller rather than the one in the # message_controller. This should go away, eventually if hasattr(self._queue_ctrl, '_inc_counter'): return self._queue_ctrl._inc_counter(queue_name, project, amount, window) now = timeutils.utcnow_ts() update = {'$inc': {'c.v': amount}, '$set': {'c.t': now}} query = _get_scoped_query(queue_name, project) if window is not None: threshold = now - window query['c.t'] = {'$lt': threshold} while True: try: collection = self._collection(queue_name, project).stats doc = collection.find_one_and_update( query, update, return_document=pymongo.ReturnDocument.AFTER, projection={ 'c.v': 1, '_id': 0 }) break except pymongo.errors.AutoReconnect as ex: LOG.exception(ex) if doc is None: if window is None: # NOTE(kgriffs): Since we did not filter by a time window, # the queue should have been found and updated. Perhaps # the queue has been deleted? message = _LW(u'Failed to increment the message ' u'counter for queue %(name)s and ' u'project %(project)s') message %= dict(name=queue_name, project=project) LOG.warning(message) raise errors.QueueDoesNotExist(queue_name, project) # NOTE(kgriffs): Assume the queue existed, but the counter # was recently updated, causing the range query on 'c.t' to # exclude the record. return None return doc['c']['v']