def wrapper(self, *args, **kwargs): # TODO(kgriffs): Figure out a way to not have to rely on the # presence of `mongodb_conf` max_attemps = self.driver.mongodb_conf.max_reconnect_attempts sleep_sec = self.driver.mongodb_conf.reconnect_sleep last_ex = None for attempt in range(max_attemps): try: return func(self, *args, **kwargs) break except errors.AutoReconnect as ex: LOG.warning( _LW(u'Caught AutoReconnect, retrying the ' 'call to {0}').format(func)) last_ex = ex time.sleep(sleep_sec * (2**attempt)) else: LOG.error( _LE(u'Caught AutoReconnect, maximum attempts ' 'to {0} exceeded.').format(func)) raise last_ex
def register_api(driver, conf): if (deprecated and [api_version] not in conf.enable_deprecated_api_versions): return None if deprecated: LOG.warning(_LW('Enabling API version %(version)s. ' 'This version was marked as deprecated in ' '%(updated)s. Using it may expose security ' 'issues, unexpected behavior or damage your ' 'data.') % {'version': api_version, 'updated': api_updated}) return fn(driver, conf)
def register_api(driver, conf): if (deprecated and [api_version] not in conf.enable_deprecated_api_versions): return None if deprecated: LOG.warning( _LW('Enabling API version %(version)s. ' 'This version was marked as deprecated in ' '%(updated)s. Using it may expose security ' 'issues, unexpected behavior or damage your ' 'data.') % { 'version': api_version, 'updated': api_updated }) return fn(driver, conf)
def setup(conf, binary, host): if conf.profiler.enabled: # Note(wangxiyuan): OSprofiler now support some kind of backends, such # as Ceilometer, ElasticSearch, Messaging and MongoDB. # 1. Ceilometer is only used for data collection, and Messaging is only # used for data transfer. So Ceilometer only works when Messaging is # enabled. # 2. ElasticSearch and MongoDB support both data collection and # transfer. So they can be used standalone. # 3. Choose which backend depends on the config option # "connection_string" , and the default value is "messaging://". backend_uri = conf.profiler.connection_string if "://" not in backend_uri: backend_uri += "://" parsed_connection = urlparse.urlparse(backend_uri) backend_type = parsed_connection.scheme if backend_type == "messaging": import oslo_messaging _notifier = notifier.create(backend_uri, oslo_messaging, {}, oslo_messaging.get_transport(conf), "Zaqar", binary, host) else: _notifier = notifier.create(backend_uri, project="Zaqar", service=binary, host=host) notifier.set(_notifier) LOG.warning( _LW("OSProfiler is enabled.\nIt means that person who " "knows any of hmac_keys that are specified in " "/etc/zaqar/zaqar.conf can trace his requests. \n In " "real life only operator can read this file so there " "is no security issue. Note that even if person can " "trigger profiler, only admin user can retrieve trace " "information.\n" "To disable OSprofiler set in zaqar.conf:\n" "[profiler]\nenabled=false")) web.enable(conf.profiler.hmac_keys) else: web.disable()
def wrapper(self, *args, **kwargs): # TODO(prashanthr_) : Try to reuse this utility. Violates DRY # Can pass config parameters into the decorator and create a # storage level utility. max_attemps = self.driver.redis_conf.max_reconnect_attempts sleep_sec = self.driver.redis_conf.reconnect_sleep for attempt in range(max_attemps): try: return func(self, *args, **kwargs) except redis.exceptions.ConnectionError: # NOTE(kgriffs): redis-py will retry once itself, # but if the command cannot be sent the second time after # disconnecting and reconnecting, the error is raised # and we will catch it here. # # NOTE(kgriffs): When using a sentinel, if a master fails # the initial retry will gracefully fail over to the # new master if the sentinel failover delay is low enough; # if the delay is too long, then redis-py will get a # MasterNotFoundError (a subclass of ConnectionError) on # it's retry, which will then just get raised and caught # here, in which case we will keep retrying until the # sentinel completes the failover and stops raising # MasterNotFoundError. ex = sys.exc_info()[1] LOG.warning( _LW(u'Caught ConnectionError, retrying the ' 'call to {0}').format(func)) time.sleep(sleep_sec * (2**attempt)) else: LOG.error( _LE(u'Caught ConnectionError, maximum attempts ' 'to {0} exceeded.').format(func)) raise ex
def wrapper(self, *args, **kwargs): # TODO(kgriffs): Figure out a way to not have to rely on the # presence of `mongodb_conf` max_attemps = self.driver.mongodb_conf.max_reconnect_attempts sleep_sec = self.driver.mongodb_conf.reconnect_sleep last_ex = None for attempt in range(max_attemps): try: return func(self, *args, **kwargs) break except errors.AutoReconnect as ex: LOG.warning(_LW(u'Caught AutoReconnect, retrying the ' 'call to {0}').format(func)) last_ex = ex time.sleep(sleep_sec * (2 ** attempt)) else: LOG.error(_LE(u'Caught AutoReconnect, maximum attempts ' 'to {0} exceeded.').format(func)) raise last_ex
def wrapper(self, *args, **kwargs): # TODO(prashanthr_) : Try to reuse this utility. Violates DRY # Can pass config parameters into the decorator and create a # storage level utility. max_attemps = self.driver.redis_conf.max_reconnect_attempts sleep_sec = self.driver.redis_conf.reconnect_sleep for attempt in range(max_attemps): try: return func(self, *args, **kwargs) except redis.exceptions.ConnectionError: # NOTE(kgriffs): redis-py will retry once itself, # but if the command cannot be sent the second time after # disconnecting and reconnecting, the error is raised # and we will catch it here. # # NOTE(kgriffs): When using a sentinel, if a master fails # the initial retry will gracefully fail over to the # new master if the sentinel failover delay is low enough; # if the delay is too long, then redis-py will get a # MasterNotFoundError (a subclass of ConnectionError) on # it's retry, which will then just get raised and caught # here, in which case we will keep retrying until the # sentinel completes the failover and stops raising # MasterNotFoundError. ex = sys.exc_info()[1] LOG.warning(_LW(u'Caught ConnectionError, retrying the ' 'call to {0}').format(func)) time.sleep(sleep_sec * (2 ** attempt)) else: LOG.error(_LE(u'Caught ConnectionError, maximum attempts ' 'to {0} exceeded.').format(func)) raise ex
def post(self, queue_name, messages, client_uuid, project=None): # NOTE(flaper87): This method should be safe to retry on # autoreconnect, since we've a 2-step insert for messages. # The worst-case scenario is that we'll increase the counter # several times and we'd end up with some non-active messages. if not self._queue_ctrl.exists(queue_name, project): raise errors.QueueDoesNotExist(queue_name, project) # NOTE(flaper87): Make sure the counter exists. This method # is an upsert. self._get_counter(queue_name, project) now = timeutils.utcnow_ts() now_dt = datetime.datetime.utcfromtimestamp(now) collection = self._collection(queue_name, project) # Set the next basis marker for the first attempt. # # Note that we don't increment the counter right away because # if 2 concurrent posts happen and the one with the higher counter # ends before the one with the lower counter, there's a window # where a client paging through the queue may get the messages # with the higher counter and skip the previous ones. This would # make our FIFO guarantee unsound. next_marker = self._get_counter(queue_name, project) # Unique transaction ID to facilitate atomic batch inserts transaction = objectid.ObjectId() prepared_messages = [{ PROJ_QUEUE: utils.scope_queue_name(queue_name, project), 't': message['ttl'], 'e': now_dt + datetime.timedelta(seconds=message['ttl']), 'u': client_uuid, 'c': { 'id': None, 'e': now }, 'b': message['body'] if 'body' in message else {}, 'k': next_marker + index, 'tx': transaction, } for index, message in enumerate(messages)] # NOTE(kgriffs): Don't take the time to do a 2-phase insert # if there is no way for it to partially succeed. if len(prepared_messages) == 1: transaction = None prepared_messages[0]['tx'] = None # Use a retry range for sanity, although we expect # to rarely, if ever, reach the maximum number of # retries. # # NOTE(kgriffs): With the default configuration (100 ms # max sleep, 1000 max attempts), the max stall time # before the operation is abandoned is 49.95 seconds. for attempt in self._retry_range: try: ids = collection.insert(prepared_messages, check_keys=False) # Log a message if we retried, for debugging perf issues if attempt != 0: msgtmpl = _(u'%(attempts)d attempt(s) required to post ' u'%(num_messages)d messages to queue ' u'"%(queue)s" under project %(project)s') LOG.debug( msgtmpl, dict(queue=queue_name, attempts=attempt + 1, num_messages=len(ids), project=project)) # Update the counter in preparation for the next batch # # NOTE(kgriffs): Due to the unique index on the messages # collection, competing inserts will fail as a whole, # and keep retrying until the counter is incremented # such that the competing marker's will start at a # unique number, 1 past the max of the messages just # inserted above. self._inc_counter(queue_name, project, amount=len(ids)) # NOTE(kgriffs): Finalize the insert once we can say that # all the messages made it. This makes bulk inserts # atomic, assuming queries filter out any non-finalized # messages. if transaction is not None: collection.update({'tx': transaction}, {'$set': { 'tx': None }}, upsert=False, multi=True) return [str(id_) for id_ in ids] except pymongo.errors.DuplicateKeyError as ex: # TODO(kgriffs): Record stats of how often retries happen, # and how many attempts, on average, are required to insert # messages. # NOTE(kgriffs): This can be used in conjunction with the # log line, above, that is emitted after all messages have # been posted, to gauge how long it is taking for messages # to be posted to a given queue, or overall. # # TODO(kgriffs): Add transaction ID to help match up loglines if attempt == 0: msgtmpl = _(u'First attempt failed while ' u'adding messages to queue ' u'"%(queue)s" under project %(project)s') LOG.debug(msgtmpl, dict(queue=queue_name, project=project)) # NOTE(kgriffs): Never retry past the point that competing # messages expire and are GC'd, since once they are gone, # the unique index no longer protects us from getting out # of order, which could cause an observer to miss this # message. The code below provides a sanity-check to ensure # this situation can not happen. elapsed = timeutils.utcnow_ts() - now if elapsed > MAX_RETRY_POST_DURATION: msgtmpl = _LW(u'Exceeded maximum retry duration for queue ' u'"%(queue)s" under project %(project)s') LOG.warning(msgtmpl, dict(queue=queue_name, project=project)) break # Chill out for a moment to mitigate thrashing/thundering self._backoff_sleep(attempt) # NOTE(kgriffs): Perhaps we failed because a worker crashed # after inserting messages, but before incrementing the # counter; that would cause all future requests to stall, # since they would keep getting the same base marker that is # conflicting with existing messages, until the messages that # "won" expire, at which time we would end up reusing markers, # and that could make some messages invisible to an observer # that is querying with a marker that is large than the ones # being reused. # # To mitigate this, we apply a heuristic to determine whether # a counter has stalled. We attempt to increment the counter, # but only if it hasn't been updated for a few seconds, which # should mean that nobody is left to update it! # # Note that we increment one at a time until the logjam is # broken, since we don't know how many messages were posted # by the worker before it crashed. next_marker = self._inc_counter(queue_name, project, window=COUNTER_STALL_WINDOW) # Retry the entire batch with a new sequence of markers. # # NOTE(kgriffs): Due to the unique index, and how # MongoDB works with batch requests, we will never # end up with a partially-successful update. The first # document in the batch will fail to insert, and the # remainder of the documents will not be attempted. if next_marker is None: # NOTE(kgriffs): Usually we will end up here, since # it should be rare that a counter becomes stalled. next_marker = self._get_counter(queue_name, project) else: msgtmpl = _LW(u'Detected a stalled message counter ' u'for queue "%(queue)s" under ' u'project %(project)s.' u'The counter was incremented to %(value)d.') LOG.warning( msgtmpl, dict(queue=queue_name, project=project, value=next_marker)) for index, message in enumerate(prepared_messages): message['k'] = next_marker + index except Exception as ex: LOG.exception(ex) raise msgtmpl = _LW(u'Hit maximum number of attempts (%(max)s) for queue ' u'"%(queue)s" under project %(project)s') LOG.warning( msgtmpl, dict(max=self.driver.mongodb_conf.max_attempts, queue=queue_name, project=project)) raise errors.MessageConflict(queue_name, project)
def _inc_counter(self, queue_name, project=None, amount=1, window=None): """Increments the message counter and returns the new value. :param queue_name: Name of the queue to which the counter is scoped :param project: Queue's project name :param amount: (Default 1) Amount by which to increment the counter :param window: (Default None) A time window, in seconds, that must have elapsed since the counter was last updated, in order to increment the counter. :returns: Updated message counter value, or None if window was specified, and the counter has already been updated within the specified time period. :raises: storage.errors.QueueDoesNotExist """ # NOTE(flaper87): If this `if` is True, it means we're # using a mongodb in the control plane. To avoid breaking # environments doing so already, we'll keep using the counter # in the mongodb queue_controller rather than the one in the # message_controller. This should go away, eventually if hasattr(self._queue_ctrl, '_inc_counter'): return self._queue_ctrl._inc_counter(queue_name, project, amount, window) now = timeutils.utcnow_ts() update = {'$inc': {'c.v': amount}, '$set': {'c.t': now}} query = _get_scoped_query(queue_name, project) if window is not None: threshold = now - window query['c.t'] = {'$lt': threshold} while True: try: collection = self._collection(queue_name, project).stats doc = collection.find_one_and_update( query, update, return_document=pymongo.ReturnDocument.AFTER, projection={ 'c.v': 1, '_id': 0 }) break except pymongo.errors.AutoReconnect as ex: LOG.exception(ex) if doc is None: if window is None: # NOTE(kgriffs): Since we did not filter by a time window, # the queue should have been found and updated. Perhaps # the queue has been deleted? message = _LW(u'Failed to increment the message ' u'counter for queue %(name)s and ' u'project %(project)s') message %= dict(name=queue_name, project=project) LOG.warning(message) raise errors.QueueDoesNotExist(queue_name, project) # NOTE(kgriffs): Assume the queue existed, but the counter # was recently updated, causing the range query on 'c.t' to # exclude the record. return None return doc['c']['v']