Esempio n. 1
0
    def _get_counter(self, name, project=None):
        """Retrieves the current message counter value for a given topic.

        This helper is used to generate monotonic pagination
        markers that are saved as part of the message
        document.

        Note 1: Markers are scoped per-topic and so are *not*
            globally unique or globally ordered.

        Note 2: If two or more requests to this method are made
            in parallel, this method will return the same counter
            value. This is done intentionally so that the caller
            can detect a parallel message post, allowing it to
            mitigate race conditions between producer and
            observer clients.

        :param name: Name of the queue to which the counter is scoped
        :param project: Topic's project
        :returns: current message counter as an integer
        """

        doc = self._collection.find_one(_get_scoped_query(name, project),
                                        projection={
                                            'c.v': 1,
                                            '_id': 0
                                        })

        if doc is None:
            raise errors.TopicDoesNotExist(name, project)

        return doc['c']['v']
Esempio n. 2
0
    def _inc_counter(self, name, project=None, amount=1, window=None):
        """Increments the message counter and returns the new value.

        :param name: Name of the topic to which the counter is scoped
        :param project: Topic's project name
        :param amount: (Default 1) Amount by which to increment the counter
        :param window: (Default None) A time window, in seconds, that
            must have elapsed since the counter was last updated, in
            order to increment the counter.

        :returns: Updated message counter value, or None if window
            was specified, and the counter has already been updated
            within the specified time period.

        :raises TopicDoesNotExist: if not found
        """
        now = timeutils.utcnow_ts()

        update = {'$inc': {'c.v': amount}, '$set': {'c.t': now}}
        query = _get_scoped_query(name, project)
        if window is not None:
            threshold = now - window
            query['c.t'] = {'$lt': threshold}

        while True:
            try:
                doc = self._collection.find_one_and_update(
                    query,
                    update,
                    return_document=ReturnDocument.AFTER,
                    projection={
                        'c.v': 1,
                        '_id': 0
                    })

                break
            except pymongo.errors.AutoReconnect:
                LOG.exception('Auto reconnect failure')

        if doc is None:
            if window is None:
                # NOTE(kgriffs): Since we did not filter by a time window,
                # the topic should have been found and updated. Perhaps
                # the topic has been deleted?
                message = _(u'Failed to increment the message '
                            u'counter for topic %(name)s and '
                            u'project %(project)s')
                message %= dict(name=name, project=project)

                LOG.warning(message)

                raise errors.TopicDoesNotExist(name, project)

            # NOTE(kgriffs): Assume the topic existed, but the counter
            # was recently updated, causing the range topic on 'c.t' to
            # exclude the record.
            return None

        return doc['c']['v']
Esempio n. 3
0
    def set_metadata(self, name, metadata, project=None):
        rst = self._collection.update_one(_get_scoped_query(name, project),
                                          {'$set': {
                                              'm': metadata
                                          }})

        if rst.matched_count == 0:
            raise errors.TopicDoesNotExist(name, project)
Esempio n. 4
0
    def get_metadata(self, name, project=None):
        queue = self._collection.find_one(_get_scoped_query(name, project),
                                          projection={
                                              'm': 1,
                                              '_id': 0
                                          })
        if queue is None:
            raise errors.TopicDoesNotExist(name, project)

        return queue.get('m', {})
Esempio n. 5
0
    def post(self, topic_name, messages, client_uuid, project=None):
        # NOTE(flaper87): This method should be safe to retry on
        # autoreconnect, since we've a 2-step insert for messages.
        # The worst-case scenario is that we'll increase the counter
        # several times and we'd end up with some non-active messages.

        if not self._topic_ctrl.exists(topic_name, project):
            raise errors.TopicDoesNotExist(topic_name, project)

        # NOTE(flaper87): Make sure the counter exists. This method
        # is an upsert.
        self._get_counter(topic_name, project)
        now = timeutils.utcnow_ts()
        now_dt = datetime.datetime.utcfromtimestamp(now)
        collection = self._collection(topic_name, project)

        messages = list(messages)
        msgs_n = len(messages)
        next_marker = self._inc_counter(topic_name, project,
                                        amount=msgs_n) - msgs_n

        prepared_messages = []
        for index, message in enumerate(messages):
            msg = {
                PROJ_TOPIC: utils.scope_queue_name(topic_name, project),
                't': message['ttl'],
                'e': now_dt + datetime.timedelta(seconds=message['ttl']),
                'u': client_uuid,
                'd': now + message.get('delay', 0),
                'b': message['body'] if 'body' in message else {},
                'k': next_marker + index,
                'tx': None
            }
            if self.driver.conf.enable_checksum:
                msg['cs'] = s_utils.get_checksum(message.get('body', None))

            prepared_messages.append(msg)

        res = collection.insert_many(prepared_messages,
                                     bypass_document_validation=True)

        return [str(id_) for id_ in res.inserted_ids]
Esempio n. 6
0
    def stats(self, name, project=None):
        if not self.topic_controller.exists(name, project=project):
            raise errors.TopicDoesNotExist(name, project)

        controller = self.message_controller

        total = controller._count(name, project=project, include_claimed=True)

        message_stats = {
            'total': total,
        }

        try:
            oldest = controller.first(name, project=project, sort=1)
            newest = controller.first(name, project=project, sort=-1)
        except errors.QueueIsEmpty:
            pass
        else:
            now = timeutils.utcnow_ts()
            message_stats['oldest'] = utils.stat_message(oldest, now)
            message_stats['newest'] = utils.stat_message(newest, now)

        return {'messages': message_stats}
Esempio n. 7
0
    def post(self, topic_name, messages, client_uuid, project=None):
        # NOTE(flaper87): This method should be safe to retry on
        # autoreconnect, since we've a 2-step insert for messages.
        # The worst-case scenario is that we'll increase the counter
        # several times and we'd end up with some non-active messages.

        if not self._topic_ctrl.exists(topic_name, project):
            raise errors.TopicDoesNotExist(topic_name, project)

        # NOTE(flaper87): Make sure the counter exists. This method
        # is an upsert.
        self._get_counter(topic_name, project)
        now = timeutils.utcnow_ts()
        now_dt = datetime.datetime.utcfromtimestamp(now)
        collection = self._collection(topic_name, project)

        # Set the next basis marker for the first attempt.
        #
        # Note that we don't increment the counter right away because
        # if 2 concurrent posts happen and the one with the higher counter
        # ends before the one with the lower counter, there's a window
        # where a client paging through the queue may get the messages
        # with the higher counter and skip the previous ones. This would
        # make our FIFO guarantee unsound.
        next_marker = self._get_counter(topic_name, project)

        # Unique transaction ID to facilitate atomic batch inserts
        transaction = objectid.ObjectId()

        prepared_messages = []
        for index, message in enumerate(messages):
            msg = {
                PROJ_TOPIC: utils.scope_queue_name(topic_name, project),
                't': message['ttl'],
                'e': now_dt + datetime.timedelta(seconds=message['ttl']),
                'u': client_uuid,
                'd': now + message.get('delay', 0),
                'b': message['body'] if 'body' in message else {},
                'k': next_marker + index,
                'tx': None
            }
            if self.driver.conf.enable_checksum:
                msg['cs'] = s_utils.get_checksum(message.get('body', None))

            prepared_messages.append(msg)

        # NOTE(kgriffs): Don't take the time to do a 2-phase insert
        # if there is no way for it to partially succeed.
        if len(prepared_messages) == 1:
            transaction = None
            prepared_messages[0]['tx'] = None

        # Use a retry range for sanity, although we expect
        # to rarely, if ever, reach the maximum number of
        # retries.
        #
        # NOTE(kgriffs): With the default configuration (100 ms
        # max sleep, 1000 max attempts), the max stall time
        # before the operation is abandoned is 49.95 seconds.
        for attempt in self._retry_range:
            try:
                res = collection.insert_many(prepared_messages,
                                             bypass_document_validation=True)

                # Log a message if we retried, for debugging perf issues
                if attempt != 0:
                    msgtmpl = _(u'%(attempts)d attempt(s) required to post '
                                u'%(num_messages)d messages to queue '
                                u'"%(topic)s" under project %(project)s')

                    LOG.debug(
                        msgtmpl,
                        dict(topic=topic_name,
                             attempts=attempt + 1,
                             num_messages=len(res.inserted_ids),
                             project=project))

                # Update the counter in preparation for the next batch
                #
                # NOTE(kgriffs): Due to the unique index on the messages
                # collection, competing inserts will fail as a whole,
                # and keep retrying until the counter is incremented
                # such that the competing marker's will start at a
                # unique number, 1 past the max of the messages just
                # inserted above.
                self._inc_counter(topic_name,
                                  project,
                                  amount=len(res.inserted_ids))

                # NOTE(kgriffs): Finalize the insert once we can say that
                # all the messages made it. This makes bulk inserts
                # atomic, assuming queries filter out any non-finalized
                # messages.
                if transaction is not None:
                    collection.update_many({'tx': transaction},
                                           {'$set': {
                                               'tx': None
                                           }},
                                           upsert=False)

                return [str(id_) for id_ in res.inserted_ids]

            except (pymongo.errors.DuplicateKeyError,
                    pymongo.errors.BulkWriteError) as ex:
                # TODO(kgriffs): Record stats of how often retries happen,
                # and how many attempts, on average, are required to insert
                # messages.

                # NOTE(kgriffs): This can be used in conjunction with the
                # log line, above, that is emitted after all messages have
                # been posted, to gauge how long it is taking for messages
                # to be posted to a given topic, or overall.
                #
                # TODO(kgriffs): Add transaction ID to help match up loglines
                if attempt == 0:
                    msgtmpl = _(u'First attempt failed while '
                                u'adding messages to topic '
                                u'"%(topic)s" under project %(project)s')

                    LOG.debug(msgtmpl, dict(topic=topic_name, project=project))

                # NOTE(kgriffs): Never retry past the point that competing
                # messages expire and are GC'd, since once they are gone,
                # the unique index no longer protects us from getting out
                # of order, which could cause an observer to miss this
                # message. The code below provides a sanity-check to ensure
                # this situation can not happen.
                elapsed = timeutils.utcnow_ts() - now
                if elapsed > MAX_RETRY_POST_DURATION:
                    msgtmpl = (u'Exceeded maximum retry duration for topic '
                               u'"%(topic)s" under project %(project)s')

                    LOG.warning(msgtmpl, dict(topic=topic_name,
                                              project=project))
                    break

                # Chill out for a moment to mitigate thrashing/thundering
                self._backoff_sleep(attempt)

                # NOTE(kgriffs): Perhaps we failed because a worker crashed
                # after inserting messages, but before incrementing the
                # counter; that would cause all future requests to stall,
                # since they would keep getting the same base marker that is
                # conflicting with existing messages, until the messages that
                # "won" expire, at which time we would end up reusing markers,
                # and that could make some messages invisible to an observer
                # that is querying with a marker that is large than the ones
                # being reused.
                #
                # To mitigate this, we apply a heuristic to determine whether
                # a counter has stalled. We attempt to increment the counter,
                # but only if it hasn't been updated for a few seconds, which
                # should mean that nobody is left to update it!
                #
                # Note that we increment one at a time until the logjam is
                # broken, since we don't know how many messages were posted
                # by the worker before it crashed.
                next_marker = self._inc_counter(topic_name,
                                                project,
                                                window=COUNTER_STALL_WINDOW)

                # Retry the entire batch with a new sequence of markers.
                #
                # NOTE(kgriffs): Due to the unique index, and how
                # MongoDB works with batch requests, we will never
                # end up with a partially-successful update. The first
                # document in the batch will fail to insert, and the
                # remainder of the documents will not be attempted.
                if next_marker is None:
                    # NOTE(kgriffs): Usually we will end up here, since
                    # it should be rare that a counter becomes stalled.
                    next_marker = self._get_counter(topic_name, project)
                else:
                    msgtmpl = (u'Detected a stalled message counter '
                               u'for topic "%(topic)s" under '
                               u'project %(project)s.'
                               u'The counter was incremented to %(value)d.')

                    LOG.warning(
                        msgtmpl,
                        dict(topic=topic_name,
                             project=project,
                             value=next_marker))

                for index, message in enumerate(prepared_messages):
                    message['k'] = next_marker + index
            except bsonerror.InvalidDocument as ex:
                LOG.exception(ex)
                raise
            except Exception as ex:
                LOG.exception(ex)
                raise

        msgtmpl = (u'Hit maximum number of attempts (%(max)s) for topic '
                   u'"%(topic)s" under project %(project)s')

        LOG.warning(
            msgtmpl,
            dict(max=self.driver.mongodb_conf.max_attempts,
                 topic=topic_name,
                 project=project))

        raise errors.MessageConflict(topic_name, project)
Esempio n. 8
0
    def _inc_counter(self, topic_name, project=None, amount=1, window=None):
        """Increments the message counter and returns the new value.

        :param topic_name: Name of the topic to which the counter is scoped
        :param project: Queue's project name
        :param amount: (Default 1) Amount by which to increment the counter
        :param window: (Default None) A time window, in seconds, that
            must have elapsed since the counter was last updated, in
            order to increment the counter.

        :returns: Updated message counter value, or None if window
            was specified, and the counter has already been updated
            within the specified time period.

        :raises QueueDoesNotExist: if not found
        """

        # NOTE(flaper87): If this `if` is True, it means we're
        # using a mongodb in the control plane. To avoid breaking
        # environments doing so already, we'll keep using the counter
        # in the mongodb topic_controller rather than the one in the
        # message_controller. This should go away, eventually
        if hasattr(self._topic_ctrl, '_inc_counter'):
            return self._topic_ctrl._inc_counter(topic_name, project, amount,
                                                 window)

        now = timeutils.utcnow_ts()

        update = {'$inc': {'c.v': amount}, '$set': {'c.t': now}}
        query = _get_scoped_query(topic_name, project)
        if window is not None:
            threshold = now - window
            query['c.t'] = {'$lt': threshold}

        while True:
            try:
                collection = self._collection(topic_name, project).stats
                doc = collection.find_one_and_update(
                    query,
                    update,
                    return_document=pymongo.ReturnDocument.AFTER,
                    projection={
                        'c.v': 1,
                        '_id': 0
                    })

                break
            except pymongo.errors.AutoReconnect as ex:
                LOG.exception(ex)

        if doc is None:
            if window is None:
                # NOTE(kgriffs): Since we did not filter by a time window,
                # the topic should have been found and updated. Perhaps
                # the topic has been deleted?
                message = (u'Failed to increment the message '
                           u'counter for topic %(name)s and '
                           u'project %(project)s')
                message %= dict(name=topic_name, project=project)

                LOG.warning(message)

                raise errors.TopicDoesNotExist(topic_name, project)

            # NOTE(kgriffs): Assume the queue existed, but the counter
            # was recently updated, causing the range query on 'c.t' to
            # exclude the record.
            return None

        return doc['c']['v']