def _inner(): if initial_delay: greenthread.sleep(initial_delay) try: while self._running: start = _ts() self.f(*self.args, **self.kw) end = _ts() if not self._running: break delay = end - start - interval if delay > 0: LOG.warn( _LW("task %(func_name)s run outlasted " "interval by %(delay).2f sec"), {"func_name": repr(self.f), "delay": delay}, ) greenthread.sleep(-delay if delay < 0 else 0) except LoopingCallDone as e: self.stop() done.send(e.retvalue) except Exception: LOG.exception(_LE("in fixed duration looping call")) done.send_exception(*sys.exc_info()) return else: done.send(True)
def _inner(): if initial_delay: greenthread.sleep(initial_delay) try: while self._running: start = timeutils.utcnow() self.f(*self.args, **self.kw) end = timeutils.utcnow() if not self._running: break delay = interval - timeutils.delta_seconds(start, end) if delay <= 0: LOG.warn(_LW('task run outlasted interval by %s sec') % -delay) greenthread.sleep(delay if delay > 0 else 0) except LoopingCallDone as e: self.stop() done.send(e.retvalue) except Exception: LOG.exception(_LE('in fixed duration looping call')) done.send_exception(*sys.exc_info()) return else: done.send(True)
def _wait_for_exit_or_signal(self, ready_callback=None): status = None signo = 0 LOG.debug('Full set of CONF:') CONF.log_opt_values(LOG, std_logging.DEBUG) try: if ready_callback: ready_callback() super(ServiceLauncher, self).wait() except SignalExit as exc: signame = _signo_to_signame(exc.signo) LOG.info(_LI('Caught %s, exiting'), signame) status = exc.code signo = exc.signo except SystemExit as exc: status = exc.code finally: self.stop() if rpc: try: rpc.cleanup() except Exception: # We're shutting down, so it doesn't matter at this point. LOG.exception(_LE('Exception during rpc cleanup.')) return status, signo
def extract_my_subset(self, group_id, iterable): """Filters an iterable, returning only objects assigned to this agent. We have a list of objects and get a list of active group members from `tooz`. We then hash all the objects into buckets and return only the ones that hashed into *our* bucket. """ if not group_id: return iterable if group_id not in self._groups: self.join_group(group_id) try: members = self._get_members(group_id) LOG.debug('Members of group: %s', members) hr = utils.HashRing(members) filtered = [ v for v in iterable if hr.get_node(str(v)) == self._my_id ] LOG.debug('My subset: %s', filtered) return filtered except tooz.coordination.ToozError: LOG.exception( _LE('Error getting group membership info from ' 'coordination backend.')) return []
def _inner(): if initial_delay: greenthread.sleep(initial_delay) try: while self._running: idle = self.f(*self.args, **self.kw) if not self._running: break if periodic_interval_max is not None: idle = min(idle, periodic_interval_max) LOG.debug( "Dynamic looping call %(func_name)s sleeping " "for %(idle).02f seconds", {"func_name": repr(self.f), "idle": idle}, ) greenthread.sleep(idle) except LoopingCallDone as e: self.stop() done.send(e.retvalue) except Exception: LOG.exception(_LE("in dynamic looping call")) done.send_exception(*sys.exc_info()) return else: done.send(True)
def _inner(): if initial_delay: greenthread.sleep(initial_delay) try: while self._running: idle = self.f(*self.args, **self.kw) if not self._running: break if periodic_interval_max is not None: idle = min(idle, periodic_interval_max) LOG.debug( 'Dynamic looping call %(func_name)s sleeping ' 'for %(idle).02f seconds', { 'func_name': repr(self.f), 'idle': idle }) greenthread.sleep(idle) except LoopingCallDone as e: self.stop() done.send(e.retvalue) except Exception: LOG.exception(_LE('in dynamic looping call')) done.send_exception(*sys.exc_info()) return else: done.send(True)
def inner_func(*args, **kwargs): last_log_time = 0 last_exc_message = None exc_count = 0 while True: try: return infunc(*args, **kwargs) except Exception as exc: this_exc_message = six.u(str(exc)) if this_exc_message == last_exc_message: exc_count += 1 else: exc_count = 1 # Do not log any more frequently than once a minute unless # the exception message changes cur_time = int(time.time()) if (cur_time - last_log_time > 60 or this_exc_message != last_exc_message): logging.exception( _LE('Unexpected exception occurred %d time(s)... ' 'retrying.') % exc_count) last_log_time = cur_time last_exc_message = this_exc_message exc_count = 0 # This should be a very rare event. In case it isn't, do # a sleep. time.sleep(1)
def _migrate_up(self, engine, version, with_data=False): """migrate up to a new version of the db. We allow for data insertion and post checks at every migration version with special _pre_upgrade_### and _check_### functions in the main test. """ # NOTE(sdague): try block is here because it's impossible to debug # where a failed data migration happens otherwise try: if with_data: data = None pre_upgrade = getattr(self, "_pre_upgrade_%03d" % version, None) if pre_upgrade: data = pre_upgrade(engine) self.migration_api.upgrade(engine, self.REPOSITORY, version) self.assertEqual(version, self.migration_api.db_version(engine, self.REPOSITORY)) if with_data: check = getattr(self, "_check_%03d" % version, None) if check: check(engine, data) except Exception: LOG.error( _LE("Failed to migrate to version %(version)s " "on engine %(engine)s") % {"version": version, "engine": engine} ) raise
def wrapped(*args, **kwargs): try: return fn(*args, **kwargs) except Exception as e: LOG.exception( _LE('An exception occurred processing ' 'the API call: %s ') % e)
def _wrap(self, *args, **kwargs): try: assert issubclass( self.__class__, sqlalchemy.orm.session.Session ), ('_wrap_db_error() can only be applied to methods of ' 'subclasses of sqlalchemy.orm.session.Session.') return f(self, *args, **kwargs) except UnicodeEncodeError: raise exception.DBInvalidUnicodeParameter() except sqla_exc.OperationalError as e: _raise_if_db_connection_lost(e, self.bind) _raise_if_deadlock_error(e, self.bind.dialect.name) # NOTE(comstud): A lot of code is checking for OperationalError # so let's not wrap it for now. raise # note(boris-42): We should catch unique constraint violation and # wrap it by our own DBDuplicateEntry exception. Unique constraint # violation is wrapped by IntegrityError. except sqla_exc.IntegrityError as e: # note(boris-42): SqlAlchemy doesn't unify errors from different # DBs so we must do this. Also in some tables (for example # instance_types) there are more than one unique constraint. This # means we should get names of columns, which values violate # unique constraint, from error message. _raise_if_duplicate_entry_error(e, self.bind.dialect.name) raise exception.DBError(e) except Exception as e: LOG.exception(_LE('DB exception wrapped.')) raise exception.DBError(e)
def _wrap(self, *args, **kwargs): try: assert issubclass( self.__class__, sqlalchemy.orm.session.Session), ( '_wrap_db_error() can only be applied to methods of ' 'subclasses of sqlalchemy.orm.session.Session.') return f(self, *args, **kwargs) except UnicodeEncodeError: raise exception.DBInvalidUnicodeParameter() except sqla_exc.OperationalError as e: _raise_if_db_connection_lost(e, self.bind) _raise_if_deadlock_error(e, self.bind.dialect.name) # NOTE(comstud): A lot of code is checking for OperationalError # so let's not wrap it for now. raise # note(boris-42): We should catch unique constraint violation and # wrap it by our own DBDuplicateEntry exception. Unique constraint # violation is wrapped by IntegrityError. except sqla_exc.IntegrityError as e: # note(boris-42): SqlAlchemy doesn't unify errors from different # DBs so we must do this. Also in some tables (for example # instance_types) there are more than one unique constraint. This # means we should get names of columns, which values violate # unique constraint, from error message. _raise_if_duplicate_entry_error(e, self.bind.dialect.name) raise exception.DBError(e) except Exception as e: LOG.exception(_LE('DB exception wrapped.')) raise exception.DBError(e)
def _process_data(self, ctxt, version, method, namespace, args): """Process a message in a new thread. If the proxy object we have has a dispatch method (see rpc.dispatcher.RpcDispatcher), pass it the version, method, and args and let it dispatch as appropriate. If not, use the old behavior of magically calling the specified method on the proxy we have here. """ ctxt.update_store() try: rval = self.proxy.dispatch(ctxt, version, method, namespace, **args) # Check if the result was a generator if inspect.isgenerator(rval): for x in rval: ctxt.reply(x, None, connection_pool=self.connection_pool) else: ctxt.reply(rval, None, connection_pool=self.connection_pool) # This final None tells multicall that it is done. ctxt.reply(ending=True, connection_pool=self.connection_pool) except rpc_common.ClientException as e: LOG.debug('Expected exception during message handling (%s)' % e._exc_info[1]) ctxt.reply(None, e._exc_info, connection_pool=self.connection_pool, log_failure=False) except Exception: # sys.exc_info() is deleted by LOG.exception(). exc_info = sys.exc_info() LOG.error(_LE('Exception during message handling'), exc_info=exc_info) ctxt.reply(None, exc_info, connection_pool=self.connection_pool)
def _inner(): if initial_delay: greenthread.sleep(initial_delay) try: while self._running: start = _ts() self.f(*self.args, **self.kw) end = _ts() if not self._running: break delay = end - start - interval if delay > 0: LOG.warn( _LW('task %(func_name)s run outlasted ' 'interval by %(delay).2f sec'), { 'func_name': repr(self.f), 'delay': delay }) greenthread.sleep(-delay if delay < 0 else 0) except LoopingCallDone as e: self.stop() done.send(e.retvalue) except Exception: LOG.exception(_LE('in fixed duration looping call')) done.send_exception(*sys.exc_info()) return else: done.send(True)
def _migrate_up(self, engine, version, with_data=False): """migrate up to a new version of the db. We allow for data insertion and post checks at every migration version with special _pre_upgrade_### and _check_### functions in the main test. """ # NOTE(sdague): try block is here because it's impossible to debug # where a failed data migration happens otherwise try: if with_data: data = None pre_upgrade = getattr(self, "_pre_upgrade_%03d" % version, None) if pre_upgrade: data = pre_upgrade(engine) self.migration_api.upgrade(engine, self.REPOSITORY, version) self.assertEqual( version, self.migration_api.db_version(engine, self.REPOSITORY)) if with_data: check = getattr(self, "_check_%03d" % version, None) if check: check(engine, data) except Exception: LOG.error( _LE("Failed to migrate to version %s on engine %s") % (version, engine)) raise
def release(self): try: self.unlock() self.lockfile.close() LOG.debug('Released file lock "%s"', self.fname) except IOError: LOG.exception(_LE("Could not release the acquired lock `%s`"), self.fname)
def _error_callback(exc): if isinstance(exc, qpid_exceptions.Empty): LOG.debug('Timed out waiting for RPC response: %s' % str(exc)) raise rpc_common.Timeout() else: LOG.exception(_LE('Failed to consume message from queue: %s') % str(exc))
def __call__(self, req): try: response = req.get_response(self.application) except Exception: LOG.exception(_LE('An error occurred during ' 'processing the request: %s')) response = webob.exc.HTTPInternalServerError() return response
def _error_callback(exc): if isinstance(exc, socket.timeout): LOG.debug('Timed out waiting for RPC response: %s' % str(exc)) raise rpc_common.Timeout() else: LOG.exception(_LE('Failed to consume message from queue: %s') % str(exc)) info['do_consume'] = True
def heartbeat(self): if self._coordinator: if not self._started: # re-connect self.start() try: self._coordinator.heartbeat() except tooz.coordination.ToozError: LOG.exception(_LE('Error sending a heartbeat to coordination ' 'backend.'))
def __exit__(self, exc_type, exc_val, exc_tb): if exc_type is not None: if self.reraise: logging.error(_LE('Original exception being dropped: %s'), traceback.format_exception(self.type_, self.value, self.tb)) return False if self.reraise: six.reraise(self.type_, self.value, self.tb)
def __exit__(self, exc_type, exc_val, exc_tb): if exc_type is not None: if self.reraise: logging.error( _LE('Original exception being dropped: %s'), traceback.format_exception(self.type_, self.value, self.tb)) return False if self.reraise: six.reraise(self.type_, self.value, self.tb)
def wrapper(*args, **kwargs): next_interval = self.retry_interval remaining = self.max_retries while True: try: return f(*args, **kwargs) except exception.DBConnectionError as e: if remaining == 0: LOG.exception(_LE("DB exceeded retry limit.")) raise exception.DBError(e) if remaining != -1: remaining -= 1 LOG.exception(_LE("DB connection error.")) # NOTE(vsergeyev): We are using patched time module, so # this effectively yields the execution # context to another green thread. time.sleep(next_interval) if self.inc_retry_interval: next_interval = min(next_interval * 2, self.max_retry_interval)
def notify(context, publisher_id, event_type, priority, payload): """Sends a notification using the specified driver :param publisher_id: the source worker_type.host of the message :param event_type: the literal type of event (ex. Instance Creation) :param priority: patterned after the enumeration of Python logging levels in the set (DEBUG, WARN, INFO, ERROR, CRITICAL) :param payload: A python dictionary of attributes Outgoing message format includes the above parameters, and appends the following: message_id a UUID representing the id for this notification timestamp the GMT timestamp the notification was sent at The composite message will be constructed as a dictionary of the above attributes, which will then be sent via the transport mechanism defined by the driver. Message example:: {'message_id': str(uuid.uuid4()), 'publisher_id': 'compute.host1', 'timestamp': timeutils.utcnow(), 'priority': 'WARN', 'event_type': 'compute.create_instance', 'payload': {'instance_id': 12, ... }} """ if priority not in log_levels: raise BadPriorityException( _('%s not in valid priorities') % priority) # Ensure everything is JSON serializable. payload = jsonutils.to_primitive(payload, convert_instances=True) msg = dict(message_id=str(uuid.uuid4()), publisher_id=publisher_id, event_type=event_type, priority=priority, payload=payload, timestamp=str(timeutils.utcnow())) for driver in _get_drivers(): try: driver.notify(context, msg) except Exception as e: LOG.exception(_LE("Problem '%(e)s' attempting to " "send to notification system. " "Payload=%(payload)s") % dict(e=e, payload=payload))
def start(self): backend_url = cfg.CONF.coordination.backend_url if backend_url: try: self._coordinator = tooz.coordination.get_coordinator( backend_url, self._my_id) self._coordinator.start() self._started = True LOG.info(_LI('Coordination backend started successfully.')) except tooz.coordination.ToozError: self._started = False LOG.exception(_LE('Error connecting to coordination backend.'))
def consume(self): """Fetch the message and pass it to the callback object.""" message = self.receiver.fetch() try: self._unpack_json_msg(message) msg = rpc_common.deserialize_msg(message.content) self.callback(msg) except Exception: LOG.exception(_LE("Failed to process message... skipping it.")) finally: # TODO(sandy): Need support for optional ack_on_error. self.session.acknowledge(message)
def reconnect(self): """Handles reconnecting and re-establishing queues. Will retry up to self.max_retries number of times. self.max_retries = 0 means to retry forever. Sleep between tries, starting at self.interval_start seconds, backing off self.interval_stepping number of seconds each attempt. """ attempt = 0 while True: params = self.params_list[next(self.next_broker_indices)] attempt += 1 try: self._connect(params) return except (IOError, self.connection_errors) as e: pass except Exception as e: # NOTE(comstud): Unfortunately it's possible for amqplib # to return an error not covered by its transport # connection_errors in the case of a timeout waiting for # a protocol response. (See paste link in LP888621) # So, we check all exceptions for 'timeout' in them # and try to reconnect in this case. if 'timeout' not in str(e): raise log_info = {} log_info['err_str'] = str(e) log_info['max_retries'] = self.max_retries log_info.update(params) if self.max_retries and attempt == self.max_retries: msg = _('Unable to connect to AMQP server on ' '%(hostname)s:%(port)d after %(max_retries)d ' 'tries: %(err_str)s') % log_info LOG.error(msg) raise rpc_common.RPCException(msg) if attempt == 1: sleep_time = self.interval_start or 1 elif attempt > 1: sleep_time += self.interval_stepping if self.interval_max: sleep_time = min(sleep_time, self.interval_max) log_info['sleep_time'] = sleep_time LOG.error(_LE('AMQP server on %(hostname)s:%(port)d is ' 'unreachable: %(err_str)s. Trying again in ' '%(sleep_time)d seconds.') % log_info) time.sleep(sleep_time)
def wrapper(*args, **kwargs): next_interval = self.retry_interval remaining = self.max_retries while True: try: return f(*args, **kwargs) except exception.DBConnectionError as e: if remaining == 0: LOG.exception(_LE('DB exceeded retry limit.')) raise exception.DBError(e) if remaining != -1: remaining -= 1 LOG.exception(_LE('DB connection error.')) # NOTE(vsergeyev): We are using patched time module, so # this effectively yields the execution # context to another green thread. time.sleep(next_interval) if self.inc_retry_interval: next_interval = min( next_interval * 2, self.max_retry_interval )
def _get_drivers(): """Instantiate, cache, and return drivers based on the CONF.""" global _drivers if _drivers is None: _drivers = {} for notification_driver in CONF.notification_driver: try: driver = importutils.import_module(notification_driver) _drivers[notification_driver] = driver except ImportError: LOG.exception(_LE("Failed to load notifier %s. " "These notifications will not be sent.") % notification_driver) return _drivers.values()
def _callback_handler(self, message, callback): """Call callback with deserialized message. Messages that are processed without exception are ack'ed. If the message processing generates an exception, it will be ack'ed if ack_on_error=True. Otherwise it will be .requeue()'ed. """ try: msg = rpc_common.deserialize_msg(message.payload) callback(msg) except Exception: if self.ack_on_error: LOG.exception(_LE("Failed to process message" " ... skipping it.")) message.ack() else: LOG.exception(_LE("Failed to process message" " ... will requeue.")) message.requeue() else: message.ack()
def notify(context, message): """Sends a notification via RPC.""" if not context: context = req_context.get_admin_context() priority = message.get('priority', CONF.default_notification_level) priority = priority.lower() for topic in CONF.notification_topics: topic = '%s.%s' % (topic, priority) try: rpc.notify(context, topic, message) except Exception: LOG.exception(_LE("Could not send notification to %(topic)s. " "Payload=%(message)s"), {"topic": topic, "message": message})
def _parse_check(rule): """Parse a single base check rule into an appropriate Check object.""" # Handle the special checks if rule == '!': return FalseCheck() elif rule == '@': return TrueCheck() try: kind, match = rule.split(':', 1) except Exception: LOG.exception(_LE("Failed to understand rule %s") % rule) # If the rule is invalid, we'll fail closed return FalseCheck() # Find what implements the check if kind in _checks: return _checks[kind](kind, match) elif None in _checks: return _checks[None](kind, match) else: LOG.error(_LE("No handler for matches of kind %s") % kind) return FalseCheck()
def _parse_check(rule): """Parse a single base check rule into an appropriate Check object.""" # Handle the special checks if rule == "!": return FalseCheck() elif rule == "@": return TrueCheck() try: kind, match = rule.split(":", 1) except Exception: LOG.exception(_LE("Failed to understand rule %s") % rule) # If the rule is invalid, we'll fail closed return FalseCheck() # Find what implements the check if kind in _checks: return _checks[kind](kind, match) elif None in _checks: return _checks[None](kind, match) else: LOG.error(_LE("No handler for matches of kind %s") % kind) return FalseCheck()
def _handler(self, ctxt, publisher_id, event_type, payload, metadata): """RPC endpoint for notification messages When another service sends a notification over the message bus, this method receives it. """ try: LOG.debug('calling endpoint _handler') self.dispatcher_manager.map_method(self.method, payload) except Exception: if self.requeue_on_error: LOG.exception(_LE("Dispatcher failed to handle the %s, " "requeue it."), self.priority) return oslo.messaging.NotificationResult.REQUEUE raise
def sample(self, ctxt, publisher_id, event_type, payload, metadata): """RPC endpoint for notification messages When another service sends a notification over the message bus, this method receives it. """ try: self.dispatcher_manager.map_method('record_metering_data', data=payload) except Exception: if cfg.CONF.collector.requeue_sample_on_dispatcher_error: LOG.exception(_LE("Dispatcher failed to handle the sample, " "requeue it.")) return oslo.messaging.NotificationResult.REQUEUE raise
def __init__(self, message=None, **kwargs): self.kwargs = kwargs if not message: try: message = self.msg_fmt % kwargs except Exception: # kwargs doesn't match a variable in the message # log the issue and the kwargs LOG.exception(_LE('Exception in string format operation')) for name, value in six.iteritems(kwargs): LOG.error("%s: %s" % (name, value)) # at least get the core message out if something happened message = self.msg_fmt super(RPCException, self).__init__(message)
def reconnect(self): """Handles reconnecting and re-establishing sessions and queues.""" delay = 1 while True: # Close the session if necessary if self.connection.opened(): try: self.connection.close() except qpid_exceptions.ConnectionError: pass broker = self.brokers[next(self.next_broker_indices)] try: self.connection_create(broker) self.connection.open() except qpid_exceptions.ConnectionError as e: msg_dict = dict(e=e, delay=delay) msg = _LE("Unable to connect to AMQP server: %(e)s. " "Sleeping %(delay)s seconds") % msg_dict LOG.error(msg) time.sleep(delay) delay = min(delay + 1, 5) else: LOG.info(_LI('Connected to AMQP server on %s'), broker) break self.session = self.connection.session() if self.consumers: consumers = self.consumers self.consumers = {} for consumer in six.itervalues(consumers): consumer.reconnect(self.session) self._register_consumer(consumer) LOG.debug("Re-established AMQP queues")
def serialize_remote_exception(failure_info, log_failure=True): """Prepares exception data to be sent over rpc. Failure_info should be a sys.exc_info() tuple. """ tb = traceback.format_exception(*failure_info) failure = failure_info[1] if log_failure: LOG.error(_LE("Returning exception %s to caller"), six.text_type(failure)) LOG.error(tb) kwargs = {} if hasattr(failure, 'kwargs'): kwargs = failure.kwargs # NOTE(matiu): With cells, it's possible to re-raise remote, remote # exceptions. Lets turn it back into the original exception type. cls_name = str(failure.__class__.__name__) mod_name = str(failure.__class__.__module__) if (cls_name.endswith(_REMOTE_POSTFIX) and mod_name.endswith(_REMOTE_POSTFIX)): cls_name = cls_name[:-len(_REMOTE_POSTFIX)] mod_name = mod_name[:-len(_REMOTE_POSTFIX)] data = { 'class': cls_name, 'module': mod_name, 'message': six.text_type(failure), 'tb': tb, 'args': failure.args, 'kwargs': kwargs } json_data = jsonutils.dumps(data) return json_data
def extract_my_subset(self, group_id, iterable): """Filters an iterable, returning only objects assigned to this agent. We have a list of objects and get a list of active group members from `tooz`. We then hash all the objects into buckets and return only the ones that hashed into *our* bucket. """ if not group_id: return iterable if group_id not in self._groups: self.join_group(group_id) try: members = self._get_members(group_id) LOG.debug('Members of group: %s', members) hr = utils.HashRing(members) filtered = [v for v in iterable if hr.get_node(str(v)) == self._my_id] LOG.debug('My subset: %s', filtered) return filtered except tooz.coordination.ToozError: LOG.exception(_LE('Error getting group membership info from ' 'coordination backend.')) return []
def _child_wait_for_exit_or_signal(self, launcher): status = 0 signo = 0 # NOTE(johannes): All exceptions are caught to ensure this # doesn't fallback into the loop spawning children. It would # be bad for a child to spawn more children. try: launcher.wait() except SignalExit as exc: signame = _signo_to_signame(exc.signo) LOG.info(_LI('Child caught %s, exiting'), signame) status = exc.code signo = exc.signo except SystemExit as exc: status = exc.code except BaseException: LOG.exception(_LE('Unhandled exception')) status = 2 finally: launcher.stop() return status, signo
def _parse_text_rule(rule): """Parses policy to the tree. Translates a policy written in the policy language into a tree of Check objects. """ # Empty rule means always accept if not rule: return TrueCheck() # Parse the token stream state = ParseState() for tok, value in _parse_tokenize(rule): state.shift(tok, value) try: return state.result except ValueError: # Couldn't parse the rule LOG.exception(_LE("Failed to understand rule %r") % rule) # Fail closed return FalseCheck()