def get_curtin_image(node): """Return boot image that supports 'xinstall' for the given node.""" osystem = node.get_osystem() series = node.get_distro_series() arch, subarch = node.split_arch() rack_controller = node.get_boot_rack_controller() try: images = get_boot_images_for(rack_controller, osystem, arch, subarch, series) except (NoConnectionsAvailable, TimeoutError): logger.error( "Unable to get RPC connection for rack controller '%s' (%s)", rack_controller.hostname, rack_controller.system_id) raise ClusterUnavailable( "Unable to get RPC connection for rack controller '%s' (%s)" % (rack_controller.hostname, rack_controller.system_id)) for image in images: if image['purpose'] == 'xinstall': return image raise MissingBootImage( "Error generating the URL of curtin's image file. " "No image could be found for the given selection: " "os=%s, arch=%s, subarch=%s, series=%s, purpose=xinstall." % ( osystem, arch, subarch, series, ))
def get_curtin_image(node): """Return boot image that supports 'xinstall' for the given node.""" osystem = node.get_osystem() series = node.get_distro_series() arch, subarch = node.split_arch() rack_controller = node.get_boot_rack_controller() try: images = get_boot_images_for(rack_controller, osystem, arch, subarch, series) except (NoConnectionsAvailable, TimeoutError): logger.error( "Unable to get RPC connection for rack controller '%s' (%s)", rack_controller.hostname, rack_controller.system_id, ) raise ClusterUnavailable( "Unable to get RPC connection for rack controller '%s' (%s)" % (rack_controller.hostname, rack_controller.system_id)) # A matching subarch may be a newer subarch which contains support for # an older one. e.g Xenial hwe-16.04 will match for ga-16.04. First # try to find the subarch we are deploying, if that isn't found allow # a newer version. for image in images: if (image["purpose"] == "xinstall" and image["subarchitecture"] == subarch): return image for image in images: if image["purpose"] == "xinstall": return image raise MissingBootImage( "Error generating the URL of curtin's image file. " "No image could be found for the given selection: " "os=%s, arch=%s, subarch=%s, series=%s, purpose=xinstall." % (osystem, arch, subarch, series))
def log_exception(self, exception): exc_info = sys.exc_info() logger.error(" Exception: %s ".center(79, "#") % str(exception)) logger.error("".join(traceback.format_exception(*exc_info)))
def process_exception(self, request, exception): import traceback import sys exc_info = sys.exc_info() logger.error(" Exception: %s ".center(79, "#") % unicode(exception)) logger.error(''.join(traceback.format_exception(*exc_info)))
def call_clusters( command, *, kwargs=None, timeout=10, controllers=None, ignore_errors=True, available_callback=_none, unavailable_callback=_none, success_callback=_none, failed_callback=_none, failure_callback=_none, timeout_callback=_none): """Make an RPC call to all rack controllers in parallel. Includes optional callbacks to report the status of the call for each controller. If the call was a success, the `success_callback` will be called immediately before the response is yielded (so that the caller can determine which controller was contacted successfully). All optional callbacks are called with a single argument: the `RackController` model object that corresponds to the RPC call. :param controllers: The :class:`RackController`s on which to make the RPC call. If None, defaults to all :class:`RackController`s. :param timeout: The maximum number of seconds to wait for responses from all controllers. :param command: An :class:`amp.Command` to call on the clusters. :param ignore_errors: If True, errors encountered whilst calling `command` on the clusters won't raise an exception. :param available_callback: Optional callback; called with the controller when an RPC connection to the controller was established. :param unavailable_callback: Optional callback; called with the controller when an RPC connection to the controller failed to be established. :param success_callback: Optional callback; called with the controller when the RPC call was a success and this method is about to yield the result. :param failed_callback: Optional callback; called with the controller if the RPC call fails. :param failure_callback: Optional callback; called with the `Failure` object if the RPC call fail with a well-known exception. :param timeout_callback: Optional callback; called if the RPC call fails with a timeout. :param kwargs: Optional keyword arguments to pass to the command :return: A generator of results, i.e. the dicts returned by the RPC call. :raises: :py:class:`ClusterUnavailable` when a cluster is not connected or there's an error during the call, and errors are not being ignored. """ # Get the name of the RPC function for logging purposes. Each RPC function # is enacapsulated in a `class`, so should have a corresponding `__name__`. # However, we don't want to crash if that isn't the case. if kwargs is None: kwargs = {} command_name = ( command.commandName.decode("ascii") if hasattr(command, 'commandName') else "<unknown>") calls = {} if controllers is None: controllers = RackController.objects.all() for controller in controllers: try: client = getClientFor(controller.system_id) except NoConnectionsAvailable: logger.error( "Error while calling %s: Unable to get RPC connection for " "rack controller '%s' (%s).", command_name, controller.hostname, controller.system_id) unavailable_callback(controller) if not ignore_errors: raise ClusterUnavailable( "Unable to get RPC connection for rack controller " "'%s' (%s)" % ( controller.hostname, controller.system_id)) else: # The call to partial() requires a `callable`, but `getClientFor()` # might return a `Deferred` if it runs in the reactor. assert callable(client), ( "call_clusters() must not be called in the reactor thread. " "You probably want to use deferToDatabase().") available_callback(controller) call = partial(client, command, **kwargs) calls[call] = controller for call, response in async.gatherCallResults(calls, timeout=timeout): # When a call returns results, figure out which controller it came from # and remove it from the list, so we can report which controllers # timed out. controller = calls[call] del calls[call] if isinstance(response, Failure): # Create a nice message for logging purposes. We can rely on # the 'type' ivar being filled in with the Exception type in a # Failure object, so use that to get a nice version of the name. exception_class = response.type.__name__ error = str(response.value).strip() if len(error) > 0: error = ": " + error human_readable_error = ( "Exception during %s() on rack controller '%s' (%s): %s%s" % ( command_name, controller.hostname, controller.system_id, exception_class, error)) logger.warning(human_readable_error) # For failures, there are two callbacks: one for the controller # that failed, the second for the specific failure that occurred. failed_callback(controller) failure_callback(response) if not ignore_errors: raise ClusterUnavailable(human_readable_error) else: success_callback(controller) yield response
exception_class, error)) logger.warning(human_readable_error) # For failures, there are two callbacks: one for the controller # that failed, the second for the specific failure that occurred. failed_callback(controller) failure_callback(response) if not ignore_errors: raise ClusterUnavailable(human_readable_error) else: success_callback(controller) yield response # Each remaining controller [value] in the `calls` dict has timed out. for controller in calls.values(): timeout_callback(controller) logger.error( "Error while calling %s: RPC connection timed out to rack " "controller '%s' (%s).", command_name, controller.hostname, controller.system_id) def get_error_message_for_exception(exception): """Return an error message for an exception. If `exception` is a NoConnectionsAvailable error, get_error_message_for_exception() will check to see if there's a UUID listed. If so, this is an error referring to a cluster. get_error_message_for_exception() will return an error message containing the cluster's name (as opposed to its UUID), which is more useful to users. If the exception has a message attached, return that. If not, create meaningful error message for the exception and return that instead.
def process_exception(self, request, exception): import traceback import sys exc_info = sys.exc_info() logger.error(" Exception: %s ".center(79, "#") % unicode(exception)) logger.error(''.join(traceback.format_exception(*exc_info)))