Example #1
0
def get_curtin_image(node):
    """Return boot image that supports 'xinstall' for the given node."""
    osystem = node.get_osystem()
    series = node.get_distro_series()
    arch, subarch = node.split_arch()
    rack_controller = node.get_boot_rack_controller()
    try:
        images = get_boot_images_for(rack_controller, osystem, arch, subarch,
                                     series)
    except (NoConnectionsAvailable, TimeoutError):
        logger.error(
            "Unable to get RPC connection for rack controller '%s' (%s)",
            rack_controller.hostname, rack_controller.system_id)
        raise ClusterUnavailable(
            "Unable to get RPC connection for rack controller '%s' (%s)" %
            (rack_controller.hostname, rack_controller.system_id))
    for image in images:
        if image['purpose'] == 'xinstall':
            return image
    raise MissingBootImage(
        "Error generating the URL of curtin's image file.  "
        "No image could be found for the given selection: "
        "os=%s, arch=%s, subarch=%s, series=%s, purpose=xinstall." % (
            osystem,
            arch,
            subarch,
            series,
        ))
Example #2
0
def get_curtin_image(node):
    """Return boot image that supports 'xinstall' for the given node."""
    osystem = node.get_osystem()
    series = node.get_distro_series()
    arch, subarch = node.split_arch()
    rack_controller = node.get_boot_rack_controller()
    try:
        images = get_boot_images_for(rack_controller, osystem, arch, subarch,
                                     series)
    except (NoConnectionsAvailable, TimeoutError):
        logger.error(
            "Unable to get RPC connection for rack controller '%s' (%s)",
            rack_controller.hostname,
            rack_controller.system_id,
        )
        raise ClusterUnavailable(
            "Unable to get RPC connection for rack controller '%s' (%s)" %
            (rack_controller.hostname, rack_controller.system_id))
    # A matching subarch may be a newer subarch which contains support for
    # an older one. e.g Xenial hwe-16.04 will match for ga-16.04. First
    # try to find the subarch we are deploying, if that isn't found allow
    # a newer version.
    for image in images:
        if (image["purpose"] == "xinstall"
                and image["subarchitecture"] == subarch):
            return image
    for image in images:
        if image["purpose"] == "xinstall":
            return image
    raise MissingBootImage(
        "Error generating the URL of curtin's image file.  "
        "No image could be found for the given selection: "
        "os=%s, arch=%s, subarch=%s, series=%s, purpose=xinstall." %
        (osystem, arch, subarch, series))
Example #3
0
 def log_exception(self, exception):
     exc_info = sys.exc_info()
     logger.error(" Exception: %s ".center(79, "#") % str(exception))
     logger.error("".join(traceback.format_exception(*exc_info)))
Example #4
0
 def process_exception(self, request, exception):
     import traceback
     import sys
     exc_info = sys.exc_info()
     logger.error(" Exception: %s ".center(79, "#") % unicode(exception))
     logger.error(''.join(traceback.format_exception(*exc_info)))
Example #5
0
def call_clusters(
        command, *, kwargs=None, timeout=10, controllers=None,
        ignore_errors=True, available_callback=_none,
        unavailable_callback=_none, success_callback=_none,
        failed_callback=_none, failure_callback=_none, timeout_callback=_none):
    """Make an RPC call to all rack controllers in parallel.

    Includes optional callbacks to report the status of the call for each
    controller. If the call was a success, the `success_callback` will be
    called immediately before the response is yielded (so that the caller
    can determine which controller was contacted successfully).

    All optional callbacks are called with a single argument: the
    `RackController` model object that corresponds to the RPC call.

    :param controllers: The :class:`RackController`s on which to make the RPC
        call. If None, defaults to all :class:`RackController`s.
    :param timeout: The maximum number of seconds to wait for responses from
        all controllers.
    :param command: An :class:`amp.Command` to call on the clusters.
    :param ignore_errors: If True, errors encountered whilst calling
        `command` on the clusters won't raise an exception.
    :param available_callback: Optional callback; called with the controller
        when an RPC connection to the controller was established.
    :param unavailable_callback: Optional callback; called with the controller
        when an RPC connection to the controller failed to be established.
    :param success_callback: Optional callback; called with the controller
        when the RPC call was a success and this method is about to yield the
        result.
    :param failed_callback: Optional callback; called with the controller if
        the RPC call fails.
    :param failure_callback: Optional callback; called with the `Failure`
        object if the RPC call fail with a well-known exception.
    :param timeout_callback: Optional callback; called if the RPC call
        fails with a timeout.
    :param kwargs: Optional keyword arguments to pass to the command
    :return: A generator of results, i.e. the dicts returned by the RPC
        call.
    :raises: :py:class:`ClusterUnavailable` when a cluster is not
        connected or there's an error during the call, and errors are
        not being ignored.
    """
    # Get the name of the RPC function for logging purposes. Each RPC function
    # is enacapsulated in a `class`, so should have a corresponding `__name__`.
    # However, we don't want to crash if that isn't the case.
    if kwargs is None:
        kwargs = {}
    command_name = (
        command.commandName.decode("ascii") if hasattr(command, 'commandName')
        else "<unknown>")
    calls = {}
    if controllers is None:
        controllers = RackController.objects.all()
    for controller in controllers:
        try:
            client = getClientFor(controller.system_id)
        except NoConnectionsAvailable:
            logger.error(
                "Error while calling %s: Unable to get RPC connection for "
                "rack controller '%s' (%s).", command_name,
                controller.hostname, controller.system_id)
            unavailable_callback(controller)
            if not ignore_errors:
                raise ClusterUnavailable(
                    "Unable to get RPC connection for rack controller "
                    "'%s' (%s)" % (
                        controller.hostname, controller.system_id))
        else:
            # The call to partial() requires a `callable`, but `getClientFor()`
            # might return a `Deferred` if it runs in the reactor.
            assert callable(client), (
                "call_clusters() must not be called in the reactor thread. "
                "You probably want to use deferToDatabase().")
            available_callback(controller)
            call = partial(client, command, **kwargs)
            calls[call] = controller

    for call, response in async.gatherCallResults(calls, timeout=timeout):
        # When a call returns results, figure out which controller it came from
        # and remove it from the list, so we can report which controllers
        # timed out.
        controller = calls[call]
        del calls[call]
        if isinstance(response, Failure):
            # Create a nice message for logging purposes. We can rely on
            # the 'type' ivar being filled in with the Exception type in a
            # Failure object, so use that to get a nice version of the name.
            exception_class = response.type.__name__
            error = str(response.value).strip()
            if len(error) > 0:
                error = ": " + error
            human_readable_error = (
                "Exception during %s() on rack controller '%s' (%s): %s%s" % (
                    command_name, controller.hostname, controller.system_id,
                    exception_class, error))
            logger.warning(human_readable_error)
            # For failures, there are two callbacks: one for the controller
            # that failed, the second for the specific failure that occurred.
            failed_callback(controller)
            failure_callback(response)
            if not ignore_errors:
                raise ClusterUnavailable(human_readable_error)
        else:
            success_callback(controller)
            yield response
Example #6
0
                    exception_class, error))
            logger.warning(human_readable_error)
            # For failures, there are two callbacks: one for the controller
            # that failed, the second for the specific failure that occurred.
            failed_callback(controller)
            failure_callback(response)
            if not ignore_errors:
                raise ClusterUnavailable(human_readable_error)
        else:
            success_callback(controller)
            yield response
    # Each remaining controller [value] in the `calls` dict has timed out.
    for controller in calls.values():
        timeout_callback(controller)
        logger.error(
            "Error while calling %s: RPC connection timed out to rack "
            "controller '%s' (%s).", command_name, controller.hostname,
            controller.system_id)


def get_error_message_for_exception(exception):
    """Return an error message for an exception.

    If `exception` is a NoConnectionsAvailable error,
    get_error_message_for_exception() will check to see if there's a
    UUID listed. If so, this is an error referring to a cluster.
    get_error_message_for_exception() will return an error message
    containing the cluster's name (as opposed to its UUID), which is
    more useful to users.

    If the exception has a message attached, return that. If not, create
    meaningful error message for the exception and return that instead.
Example #7
0
 def process_exception(self, request, exception):
     import traceback
     import sys
     exc_info = sys.exc_info()
     logger.error(" Exception: %s ".center(79, "#") % unicode(exception))
     logger.error(''.join(traceback.format_exception(*exc_info)))