Exemple #1
0
    def post(self, queue, messages, client_uuid, project=None):
        msgset_key = utils.msgset_key(queue, project)
        counter_key = utils.scope_queue_index(queue, project,
                                              MESSAGE_RANK_COUNTER_SUFFIX)

        message_ids = []
        now = timeutils.utcnow_ts()
        with self._client.pipeline() as pipe:
            for msg in messages:
                prepared_msg = Message(
                    ttl=msg['ttl'],
                    created=now,
                    client_uuid=client_uuid,
                    claim_id=None,
                    claim_expires=now,
                    claim_count=0,
                    delay_expires=now + msg.get('delay', 0),
                    body=msg.get('body', {}),
                    checksum=s_utils.get_checksum(msg.get('body', None)) if
                    self.driver.conf.enable_checksum else None
                )

                prepared_msg.to_redis(pipe)
                message_ids.append(prepared_msg.id)

            pipe.execute()

        # NOTE(kgriffs): If this call fails, we will return
        # an error to the client and the messages will be
        # orphaned, but Redis will remove them when they
        # expire, so we will just pretend they don't exist
        # in that case.
        self._index_messages(msgset_key, counter_key, message_ids)

        return message_ids
Exemple #2
0
    def post(self, queue, messages, client_uuid, project=None):
        msgset_key = utils.msgset_key(queue, project)
        counter_key = utils.scope_queue_index(queue, project,
                                              MESSAGE_RANK_COUNTER_SUFFIX)

        message_ids = []
        now = timeutils.utcnow_ts()
        with self._client.pipeline() as pipe:
            for msg in messages:
                prepared_msg = Message(
                    ttl=msg['ttl'],
                    created=now,
                    client_uuid=client_uuid,
                    claim_id=None,
                    claim_expires=now,
                    claim_count=0,
                    delay_expires=now + msg.get('delay', 0),
                    body=msg.get('body', {}),
                    checksum=s_utils.get_checksum(msg.get('body', None)) if
                    self.driver.conf.enable_checksum else None
                )

                prepared_msg.to_redis(pipe)
                message_ids.append(prepared_msg.id)

            pipe.execute()

        # NOTE(kgriffs): If this call fails, we will return
        # an error to the client and the messages will be
        # orphaned, but Redis will remove them when they
        # expire, so we will just pretend they don't exist
        # in that case.
        self._index_messages(msgset_key, counter_key, message_ids)

        return message_ids
Exemple #3
0
    def _create_msg(self, queue, msg, client_uuid, project):
        slug = str(uuid.uuid1())
        now = timeutils.utcnow_ts()
        message = {
            'body': msg.get('body', {}),
            'claim_id': None,
            'ttl': msg['ttl'],
            'claim_count': 0,
            'delay_expires': now + msg.get('delay', 0)
        }

        if self.driver.conf.enable_checksum:
            message['checksum'] = s_utils.get_checksum(msg.get('body', None))

        contents = jsonutils.dumps(message)
        utils._put_or_create_container(self._client,
                                       utils._message_container(
                                           queue, project),
                                       slug,
                                       contents=contents,
                                       content_type='application/json',
                                       headers={
                                           'x-object-meta-clientid':
                                           str(client_uuid),
                                           'x-delete-after':
                                           msg['ttl']
                                       })
        return slug
Exemple #4
0
    def post(self, queue_name, messages, client_uuid, project=None):
        # NOTE(flaper87): This method should be safe to retry on
        # autoreconnect, since we've a 2-step insert for messages.
        # The worst-case scenario is that we'll increase the counter
        # several times and we'd end up with some non-active messages.

        if not self._queue_ctrl.exists(queue_name, project):
            raise errors.QueueDoesNotExist(queue_name, project)

        # NOTE(flaper87): Make sure the counter exists. This method
        # is an upsert.
        self._get_counter(queue_name, project)
        now = timeutils.utcnow_ts()
        now_dt = datetime.datetime.utcfromtimestamp(now)
        collection = self._collection(queue_name, project)

        messages = list(messages)
        msgs_n = len(messages)
        next_marker = self._inc_counter(queue_name, project,
                                        amount=msgs_n) - msgs_n

        prepared_messages = []
        for index, message in enumerate(messages):
            msg = {
                PROJ_QUEUE: utils.scope_queue_name(queue_name, project),
                't': message['ttl'],
                'e': now_dt + datetime.timedelta(seconds=message['ttl']),
                'u': client_uuid,
                'c': {
                    'id': None,
                    'e': now,
                    'c': 0
                },
                'd': now + message.get('delay', 0),
                'b': message['body'] if 'body' in message else {},
                'k': next_marker + index,
                'tx': None
            }
            if self.driver.conf.enable_checksum:
                msg['cs'] = s_utils.get_checksum(message.get('body', None))

            prepared_messages.append(msg)

        res = collection.insert_many(prepared_messages,
                                     bypass_document_validation=True)

        return [str(id_) for id_ in res.inserted_ids]
Exemple #5
0
    def post(self, queue_name, messages, client_uuid, project=None):
        # NOTE(flaper87): This method should be safe to retry on
        # autoreconnect, since we've a 2-step insert for messages.
        # The worst-case scenario is that we'll increase the counter
        # several times and we'd end up with some non-active messages.

        if not self._queue_ctrl.exists(queue_name, project):
            raise errors.QueueDoesNotExist(queue_name, project)

        # NOTE(flaper87): Make sure the counter exists. This method
        # is an upsert.
        self._get_counter(queue_name, project)
        now = timeutils.utcnow_ts()
        now_dt = datetime.datetime.utcfromtimestamp(now)
        collection = self._collection(queue_name, project)

        messages = list(messages)
        msgs_n = len(messages)
        next_marker = self._inc_counter(queue_name,
                                        project,
                                        amount=msgs_n) - msgs_n

        prepared_messages = []
        for index, message in enumerate(messages):
            msg = {
                PROJ_QUEUE: utils.scope_queue_name(queue_name, project),
                't': message['ttl'],
                'e': now_dt + datetime.timedelta(seconds=message['ttl']),
                'u': client_uuid,
                'c': {'id': None, 'e': now, 'c': 0},
                'd': now + message.get('delay', 0),
                'b': message['body'] if 'body' in message else {},
                'k': next_marker + index,
                'tx': None
                }
            if self.driver.conf.enable_checksum:
                msg['cs'] = s_utils.get_checksum(message.get('body', None))

            prepared_messages.append(msg)

        res = collection.insert_many(prepared_messages,
                                     bypass_document_validation=True)

        return [str(id_) for id_ in res.inserted_ids]
Exemple #6
0
    def _create_msg(self, queue, msg, client_uuid, project):
        slug = str(uuid.uuid1())
        now = timeutils.utcnow_ts()
        message = {'body': msg.get('body', {}), 'claim_id': None,
                   'ttl': msg['ttl'], 'claim_count': 0,
                   'delay_expires': now + msg.get('delay', 0)}

        if self.driver.conf.enable_checksum:
            message['checksum'] = s_utils.get_checksum(msg.get('body', None))

        contents = jsonutils.dumps(message)
        utils._put_or_create_container(
            self._client,
            utils._message_container(queue, project),
            slug,
            contents=contents,
            content_type='application/json',
            headers={
                'x-object-meta-clientid': str(client_uuid),
                'x-delete-after': msg['ttl']})
        return slug
Exemple #7
0
    def post(self, queue_name, messages, client_uuid, project=None):
        # NOTE(flaper87): This method should be safe to retry on
        # autoreconnect, since we've a 2-step insert for messages.
        # The worst-case scenario is that we'll increase the counter
        # several times and we'd end up with some non-active messages.

        if not self._queue_ctrl.exists(queue_name, project):
            raise errors.QueueDoesNotExist(queue_name, project)

        # NOTE(flaper87): Make sure the counter exists. This method
        # is an upsert.
        self._get_counter(queue_name, project)
        now = timeutils.utcnow_ts()
        now_dt = datetime.datetime.utcfromtimestamp(now)
        collection = self._collection(queue_name, project)

        # Set the next basis marker for the first attempt.
        #
        # Note that we don't increment the counter right away because
        # if 2 concurrent posts happen and the one with the higher counter
        # ends before the one with the lower counter, there's a window
        # where a client paging through the queue may get the messages
        # with the higher counter and skip the previous ones. This would
        # make our FIFO guarantee unsound.
        next_marker = self._get_counter(queue_name, project)

        # Unique transaction ID to facilitate atomic batch inserts
        transaction = objectid.ObjectId()

        prepared_messages = []
        for index, message in enumerate(messages):
            msg = {
                PROJ_QUEUE: utils.scope_queue_name(queue_name, project),
                't': message['ttl'],
                'e': now_dt + datetime.timedelta(seconds=message['ttl']),
                'u': client_uuid,
                'c': {'id': None, 'e': now, 'c': 0},
                'd': now + message.get('delay', 0),
                'b': message['body'] if 'body' in message else {},
                'k': next_marker + index,
                'tx': None
                }
            if self.driver.conf.enable_checksum:
                msg['cs'] = s_utils.get_checksum(message.get('body', None))

            prepared_messages.append(msg)

        # NOTE(kgriffs): Don't take the time to do a 2-phase insert
        # if there is no way for it to partially succeed.
        if len(prepared_messages) == 1:
            transaction = None
            prepared_messages[0]['tx'] = None

        # Use a retry range for sanity, although we expect
        # to rarely, if ever, reach the maximum number of
        # retries.
        #
        # NOTE(kgriffs): With the default configuration (100 ms
        # max sleep, 1000 max attempts), the max stall time
        # before the operation is abandoned is 49.95 seconds.
        for attempt in self._retry_range:
            try:
                res = collection.insert_many(prepared_messages,
                                             bypass_document_validation=True)

                # Log a message if we retried, for debugging perf issues
                if attempt != 0:
                    msgtmpl = _(u'%(attempts)d attempt(s) required to post '
                                u'%(num_messages)d messages to queue '
                                u'"%(queue)s" under project %(project)s')

                    LOG.debug(msgtmpl,
                              dict(queue=queue_name,
                                   attempts=attempt + 1,
                                   num_messages=len(res.inserted_ids),
                                   project=project))

                # Update the counter in preparation for the next batch
                #
                # NOTE(kgriffs): Due to the unique index on the messages
                # collection, competing inserts will fail as a whole,
                # and keep retrying until the counter is incremented
                # such that the competing marker's will start at a
                # unique number, 1 past the max of the messages just
                # inserted above.
                self._inc_counter(queue_name, project,
                                  amount=len(res.inserted_ids))

                # NOTE(kgriffs): Finalize the insert once we can say that
                # all the messages made it. This makes bulk inserts
                # atomic, assuming queries filter out any non-finalized
                # messages.
                if transaction is not None:
                    collection.update_many({'tx': transaction},
                                           {'$set': {'tx': None}},
                                           upsert=False)

                return [str(id_) for id_ in res.inserted_ids]

            except (pymongo.errors.DuplicateKeyError,
                    pymongo.errors.BulkWriteError) as ex:
                # TODO(kgriffs): Record stats of how often retries happen,
                # and how many attempts, on average, are required to insert
                # messages.

                # NOTE(kgriffs): This can be used in conjunction with the
                # log line, above, that is emitted after all messages have
                # been posted, to gauge how long it is taking for messages
                # to be posted to a given queue, or overall.
                #
                # TODO(kgriffs): Add transaction ID to help match up loglines
                if attempt == 0:
                    msgtmpl = _(u'First attempt failed while '
                                u'adding messages to queue '
                                u'"%(queue)s" under project %(project)s')

                    LOG.debug(msgtmpl, dict(queue=queue_name, project=project))

                # NOTE(kgriffs): Never retry past the point that competing
                # messages expire and are GC'd, since once they are gone,
                # the unique index no longer protects us from getting out
                # of order, which could cause an observer to miss this
                # message. The code below provides a sanity-check to ensure
                # this situation can not happen.
                elapsed = timeutils.utcnow_ts() - now
                if elapsed > MAX_RETRY_POST_DURATION:
                    msgtmpl = (u'Exceeded maximum retry duration for queue '
                               u'"%(queue)s" under project %(project)s')

                    LOG.warning(msgtmpl,
                                dict(queue=queue_name, project=project))
                    break

                # Chill out for a moment to mitigate thrashing/thundering
                self._backoff_sleep(attempt)

                # NOTE(kgriffs): Perhaps we failed because a worker crashed
                # after inserting messages, but before incrementing the
                # counter; that would cause all future requests to stall,
                # since they would keep getting the same base marker that is
                # conflicting with existing messages, until the messages that
                # "won" expire, at which time we would end up reusing markers,
                # and that could make some messages invisible to an observer
                # that is querying with a marker that is large than the ones
                # being reused.
                #
                # To mitigate this, we apply a heuristic to determine whether
                # a counter has stalled. We attempt to increment the counter,
                # but only if it hasn't been updated for a few seconds, which
                # should mean that nobody is left to update it!
                #
                # Note that we increment one at a time until the logjam is
                # broken, since we don't know how many messages were posted
                # by the worker before it crashed.
                next_marker = self._inc_counter(
                    queue_name, project, window=COUNTER_STALL_WINDOW)

                # Retry the entire batch with a new sequence of markers.
                #
                # NOTE(kgriffs): Due to the unique index, and how
                # MongoDB works with batch requests, we will never
                # end up with a partially-successful update. The first
                # document in the batch will fail to insert, and the
                # remainder of the documents will not be attempted.
                if next_marker is None:
                    # NOTE(kgriffs): Usually we will end up here, since
                    # it should be rare that a counter becomes stalled.
                    next_marker = self._get_counter(
                        queue_name, project)
                else:
                    msgtmpl = (u'Detected a stalled message counter '
                               u'for queue "%(queue)s" under '
                               u'project %(project)s.'
                               u'The counter was incremented to %(value)d.')

                    LOG.warning(msgtmpl,
                                dict(queue=queue_name,
                                     project=project,
                                     value=next_marker))

                for index, message in enumerate(prepared_messages):
                    message['k'] = next_marker + index
            except bsonerror.InvalidDocument as ex:
                LOG.exception(ex)
                raise
            except Exception as ex:
                LOG.exception(ex)
                raise

        msgtmpl = (u'Hit maximum number of attempts (%(max)s) for queue '
                   u'"%(queue)s" under project %(project)s')

        LOG.warning(msgtmpl,
                    dict(max=self.driver.mongodb_conf.max_attempts,
                         queue=queue_name,
                         project=project))

        raise errors.MessageConflict(queue_name, project)
Exemple #8
0
    def post(self, topic_name, messages, client_uuid, project=None):
        # NOTE(flaper87): This method should be safe to retry on
        # autoreconnect, since we've a 2-step insert for messages.
        # The worst-case scenario is that we'll increase the counter
        # several times and we'd end up with some non-active messages.

        if not self._topic_ctrl.exists(topic_name, project):
            raise errors.TopicDoesNotExist(topic_name, project)

        # NOTE(flaper87): Make sure the counter exists. This method
        # is an upsert.
        self._get_counter(topic_name, project)
        now = timeutils.utcnow_ts()
        now_dt = datetime.datetime.utcfromtimestamp(now)
        collection = self._collection(topic_name, project)

        # Set the next basis marker for the first attempt.
        #
        # Note that we don't increment the counter right away because
        # if 2 concurrent posts happen and the one with the higher counter
        # ends before the one with the lower counter, there's a window
        # where a client paging through the queue may get the messages
        # with the higher counter and skip the previous ones. This would
        # make our FIFO guarantee unsound.
        next_marker = self._get_counter(topic_name, project)

        # Unique transaction ID to facilitate atomic batch inserts
        transaction = objectid.ObjectId()

        prepared_messages = []
        for index, message in enumerate(messages):
            msg = {
                PROJ_TOPIC: utils.scope_queue_name(topic_name, project),
                't': message['ttl'],
                'e': now_dt + datetime.timedelta(seconds=message['ttl']),
                'u': client_uuid,
                'd': now + message.get('delay', 0),
                'b': message['body'] if 'body' in message else {},
                'k': next_marker + index,
                'tx': None
            }
            if self.driver.conf.enable_checksum:
                msg['cs'] = s_utils.get_checksum(message.get('body', None))

            prepared_messages.append(msg)

        # NOTE(kgriffs): Don't take the time to do a 2-phase insert
        # if there is no way for it to partially succeed.
        if len(prepared_messages) == 1:
            transaction = None
            prepared_messages[0]['tx'] = None

        # Use a retry range for sanity, although we expect
        # to rarely, if ever, reach the maximum number of
        # retries.
        #
        # NOTE(kgriffs): With the default configuration (100 ms
        # max sleep, 1000 max attempts), the max stall time
        # before the operation is abandoned is 49.95 seconds.
        for attempt in self._retry_range:
            try:
                res = collection.insert_many(prepared_messages,
                                             bypass_document_validation=True)

                # Log a message if we retried, for debugging perf issues
                if attempt != 0:
                    msgtmpl = _(u'%(attempts)d attempt(s) required to post '
                                u'%(num_messages)d messages to queue '
                                u'"%(topic)s" under project %(project)s')

                    LOG.debug(
                        msgtmpl,
                        dict(topic=topic_name,
                             attempts=attempt + 1,
                             num_messages=len(res.inserted_ids),
                             project=project))

                # Update the counter in preparation for the next batch
                #
                # NOTE(kgriffs): Due to the unique index on the messages
                # collection, competing inserts will fail as a whole,
                # and keep retrying until the counter is incremented
                # such that the competing marker's will start at a
                # unique number, 1 past the max of the messages just
                # inserted above.
                self._inc_counter(topic_name,
                                  project,
                                  amount=len(res.inserted_ids))

                # NOTE(kgriffs): Finalize the insert once we can say that
                # all the messages made it. This makes bulk inserts
                # atomic, assuming queries filter out any non-finalized
                # messages.
                if transaction is not None:
                    collection.update_many({'tx': transaction},
                                           {'$set': {
                                               'tx': None
                                           }},
                                           upsert=False)

                return [str(id_) for id_ in res.inserted_ids]

            except (pymongo.errors.DuplicateKeyError,
                    pymongo.errors.BulkWriteError) as ex:
                # TODO(kgriffs): Record stats of how often retries happen,
                # and how many attempts, on average, are required to insert
                # messages.

                # NOTE(kgriffs): This can be used in conjunction with the
                # log line, above, that is emitted after all messages have
                # been posted, to gauge how long it is taking for messages
                # to be posted to a given topic, or overall.
                #
                # TODO(kgriffs): Add transaction ID to help match up loglines
                if attempt == 0:
                    msgtmpl = _(u'First attempt failed while '
                                u'adding messages to topic '
                                u'"%(topic)s" under project %(project)s')

                    LOG.debug(msgtmpl, dict(topic=topic_name, project=project))

                # NOTE(kgriffs): Never retry past the point that competing
                # messages expire and are GC'd, since once they are gone,
                # the unique index no longer protects us from getting out
                # of order, which could cause an observer to miss this
                # message. The code below provides a sanity-check to ensure
                # this situation can not happen.
                elapsed = timeutils.utcnow_ts() - now
                if elapsed > MAX_RETRY_POST_DURATION:
                    msgtmpl = (u'Exceeded maximum retry duration for topic '
                               u'"%(topic)s" under project %(project)s')

                    LOG.warning(msgtmpl, dict(topic=topic_name,
                                              project=project))
                    break

                # Chill out for a moment to mitigate thrashing/thundering
                self._backoff_sleep(attempt)

                # NOTE(kgriffs): Perhaps we failed because a worker crashed
                # after inserting messages, but before incrementing the
                # counter; that would cause all future requests to stall,
                # since they would keep getting the same base marker that is
                # conflicting with existing messages, until the messages that
                # "won" expire, at which time we would end up reusing markers,
                # and that could make some messages invisible to an observer
                # that is querying with a marker that is large than the ones
                # being reused.
                #
                # To mitigate this, we apply a heuristic to determine whether
                # a counter has stalled. We attempt to increment the counter,
                # but only if it hasn't been updated for a few seconds, which
                # should mean that nobody is left to update it!
                #
                # Note that we increment one at a time until the logjam is
                # broken, since we don't know how many messages were posted
                # by the worker before it crashed.
                next_marker = self._inc_counter(topic_name,
                                                project,
                                                window=COUNTER_STALL_WINDOW)

                # Retry the entire batch with a new sequence of markers.
                #
                # NOTE(kgriffs): Due to the unique index, and how
                # MongoDB works with batch requests, we will never
                # end up with a partially-successful update. The first
                # document in the batch will fail to insert, and the
                # remainder of the documents will not be attempted.
                if next_marker is None:
                    # NOTE(kgriffs): Usually we will end up here, since
                    # it should be rare that a counter becomes stalled.
                    next_marker = self._get_counter(topic_name, project)
                else:
                    msgtmpl = (u'Detected a stalled message counter '
                               u'for topic "%(topic)s" under '
                               u'project %(project)s.'
                               u'The counter was incremented to %(value)d.')

                    LOG.warning(
                        msgtmpl,
                        dict(topic=topic_name,
                             project=project,
                             value=next_marker))

                for index, message in enumerate(prepared_messages):
                    message['k'] = next_marker + index
            except bsonerror.InvalidDocument as ex:
                LOG.exception(ex)
                raise
            except Exception as ex:
                LOG.exception(ex)
                raise

        msgtmpl = (u'Hit maximum number of attempts (%(max)s) for topic '
                   u'"%(topic)s" under project %(project)s')

        LOG.warning(
            msgtmpl,
            dict(max=self.driver.mongodb_conf.max_attempts,
                 topic=topic_name,
                 project=project))

        raise errors.MessageConflict(topic_name, project)