Beispiel #1
0
    def __call__(self, node):
        if node.instance_id:
            LOG.debug('Node %s is already reserved', _utils.log_res(node))
            return False

        if node.is_maintenance:
            LOG.debug('Node %s is in maintenance', _utils.log_res(node))
            return False

        if (self.resource_class is not None
                and node.resource_class != self.resource_class):
            LOG.debug(
                'Resource class %(real)s does not match the expected '
                'value of %(exp)s for node %(node)s', {
                    'node': _utils.log_res(node),
                    'exp': self.resource_class,
                    'real': node.resource_class
                })
            return False

        if (self.conductor_group is not None
                and node.conductor_group != self.conductor_group):
            LOG.debug(
                'Conductor group %(real)s does not match the expected '
                'value of %(exp)s for node %(node)s', {
                    'node': _utils.log_res(node),
                    'exp': self.conductor_group,
                    'real': node.conductor_group
                })
            return False

        return True
Beispiel #2
0
    def _check_and_update_allocation_for_node(self, node, hostname=None):
        # No allocation with given hostname, find one corresponding to the
        # node.
        allocation = self.connection.baremetal.get_allocation(
            node.allocation_id)
        if allocation.name and hostname and allocation.name != hostname:
            # Prevent updating of an existing hostname, since we don't
            # understand the intention
            raise exceptions.InvalidNode(
                "Allocation %(alloc)s associated with node %(node)s "
                "uses hostname %(old)s that does not match the expected "
                "hostname %(new)s" % {
                    'alloc': _utils.log_res(allocation),
                    'node': _utils.log_res(node),
                    'old': allocation.name,
                    'new': hostname
                })
        elif not allocation.name and not self._dry_run:
            if not hostname:
                hostname = _utils.default_hostname(node)
            # Set the hostname that was not set in reserve_node.
            LOG.debug(
                'Updating allocation %(alloc)s for node '
                '%(node)s with hostname %(host)s', {
                    'alloc': _utils.log_res(allocation),
                    'node': _utils.log_res(node),
                    'host': hostname
                })
            allocation = self.connection.baremetal.update_allocation(
                allocation, name=hostname)

        return allocation
Beispiel #3
0
    def create_and_attach_ports(self):
        """Attach ports to the node, creating them if requested."""
        self.validate()

        for nic_type, nic in self._validated:
            if nic_type != 'port':
                # The 'binding:host_id' must be set to ensure IP allocation
                # is not deferred.
                # See: https://storyboard.openstack.org/#!/story/2009715
                port = self._connection.network.create_port(
                    binding_host_id=self._node.id, **nic)
                self.created_ports.append(port.id)
                LOG.info(
                    'Created port %(port)s for node %(node)s with '
                    '%(nic)s', {
                        'port': _utils.log_res(port),
                        'node': _utils.log_res(self._node),
                        'nic': nic
                    })
            else:
                # The 'binding:host_id' must be set to ensure IP allocation
                # is not deferred.
                # See: https://storyboard.openstack.org/#!/story/2009715
                self._connection.network.update_port(
                    nic, binding_host_id=self._node.id)
                port = nic

            self._connection.baremetal.attach_vif_to_node(self._node, port.id)
            LOG.info('Attached port %(port)s to node %(node)s', {
                'port': _utils.log_res(port),
                'node': _utils.log_res(self._node)
            })
            self.attached_ports.append(port.id)
Beispiel #4
0
    def undeploy(self, node):
        """Output result of undeploy."""
        if node.provision_state == 'available':
            message = "Successfully unprovisioned node %(node)s"
        else:
            message = "Unprovisioning started for node %(node)s"

        _print(message, node=_utils.log_res(node))
Beispiel #5
0
    def __call__(self, node):
        if not self._capabilities:
            return True

        try:
            caps = _utils.get_capabilities(node)
        except Exception:
            LOG.exception(
                'Malformed capabilities on node %(node)s: %(caps)s', {
                    'node': _utils.log_res(node),
                    'caps': node.properties.get('capabilities')
                })
            return False

        LOG.debug('Capabilities for node %(node)s: %(caps)s', {
            'node': _utils.log_res(node),
            'caps': caps
        })
        for key, value in self._capabilities.items():
            try:
                node_value = caps[key]
            except KeyError:
                LOG.debug('Node %(node)s does not have capability %(cap)s', {
                    'node': _utils.log_res(node),
                    'cap': key
                })
                return False
            else:
                self._counter["%s=%s" % (key, node_value)] += 1
                if value != node_value:
                    LOG.debug(
                        'Node %(node)s has capability %(cap)s of '
                        'value "%(node_val)s" instead of "%(expected)s"', {
                            'node': _utils.log_res(node),
                            'cap': key,
                            'node_val': node_value,
                            'expected': value
                        })
                    return False

        return True
Beispiel #6
0
    def unprovision_node(self, node, wait=None):
        """Unprovision a previously provisioned node.

        :param node: `Node` object, :py:class:`metalsmith.Instance`,
            hostname, UUID or node name.
        :param wait: How many seconds to wait for the process to finish,
            None to return immediately.
        :return: the latest `Node` object.
        :raises: :py:class:`metalsmith.exceptions.DeploymentFailed`
            if undeployment fails.
        :raises: :py:class:`metalsmith.exceptions.DeploymentTimeout`
            if undeployment times out.
        :raises: :py:class:`metalsmith.exceptions.InstanceNotFound`
            if requested node cannot be found.
        """
        node = self._find_node_and_allocation(node)[0]
        if self._dry_run:
            LOG.warning("Dry run, not unprovisioning")
            return

        self._clean_up(node, remove_instance_info=False)
        try:
            node = self.connection.baremetal.set_node_provision_state(
                node, 'deleted', wait=False)

            LOG.info('Deleting started for node %s', _utils.log_res(node))

            if wait is None:
                return node

            node = self.connection.baremetal.wait_for_nodes_provision_state(
                [node], 'available', timeout=wait)[0]
        except os_exc.ResourceTimeout as exc:
            raise exceptions.DeploymentTimeout(str(exc))
        except os_exc.SDKException as exc:
            raise exceptions.DeploymentFailed(str(exc))

        LOG.info('Node %s undeployed successfully', _utils.log_res(node))
        return node
Beispiel #7
0
def detach_and_delete_ports(connection, node, created_ports, attached_ports):
    """Detach attached port and delete previously created ones.

    :param connection: `openstacksdk.Connection` instance.
    :param node: `Node` object to detach ports from.
    :param created_ports: List of IDs of previously created ports.
    :param attached_ports: List of IDs of previously attached_ports.
    """
    for port_id in set(attached_ports + created_ports):
        LOG.debug('Detaching port %(port)s from node %(node)s', {
            'port': port_id,
            'node': _utils.log_res(node)
        })
        try:
            connection.baremetal.detach_vif_from_node(node, port_id)
        except Exception as exc:
            LOG.debug(
                'Failed to remove VIF %(vif)s from node %(node)s, '
                'assuming already removed: %(exc)s', {
                    'vif': port_id,
                    'node': _utils.log_res(node),
                    'exc': exc
                })

    for port_id in created_ports:
        LOG.debug('Deleting port %s', port_id)
        try:
            connection.network.delete_port(port_id, ignore_missing=False)
        except Exception as exc:
            LOG.warning('Failed to delete neutron port %(port)s: %(exc)s', {
                'port': port_id,
                'exc': exc
            })
        else:
            LOG.info('Deleted port %(port)s for node %(node)s', {
                'port': port_id,
                'node': _utils.log_res(node)
            })
Beispiel #8
0
 def _patch_reserved_node(self, node, allocation, hostname, capabilities):
     """Make required updates on a newly reserved node."""
     if capabilities:
         patch = [{
             'path': '/instance_info/capabilities',
             'op': 'add',
             'value': capabilities
         }]
         LOG.debug('Patching reserved node %(node)s with %(patch)s', {
             'node': _utils.log_res(node),
             'patch': patch
         })
         return self.connection.baremetal.patch_node(node, patch)
     else:
         return node
Beispiel #9
0
    def _clean_up(self, node, nics=None, remove_instance_info=True):
        if nics is None:
            created_ports = node.extra.get(_CREATED_PORTS, [])
            attached_ports = node.extra.get(_ATTACHED_PORTS, [])
            _nics.detach_and_delete_ports(self.connection, node, created_ports,
                                          attached_ports)
        else:
            nics.detach_and_delete_ports()

        extra = node.extra.copy()
        for item in (_CREATED_PORTS, _ATTACHED_PORTS):
            extra.pop(item, None)

        kwargs = {}
        if node.allocation_id and node.provision_state != 'active':
            # Try to remove allocation (it will fail for active nodes)
            LOG.debug(
                'Trying to remove allocation %(alloc)s for node '
                '%(node)s', {
                    'alloc': node.allocation_id,
                    'node': _utils.log_res(node)
                })
            try:
                self.connection.baremetal.delete_allocation(node.allocation_id)
            except Exception as exc:
                LOG.debug(
                    'Failed to remove allocation %(alloc)s for %(node)s:'
                    ' %(exc)s', {
                        'alloc': node.allocation_id,
                        'node': _utils.log_res(node),
                        'exc': exc
                    })
        elif not node.allocation_id:
            # Old-style reservations have to be cleared explicitly
            kwargs['instance_id'] = None

        try:
            if remove_instance_info:
                LOG.debug(
                    'Updating node %(node)s with empty instance info '
                    '(was %(iinfo)s) and extras %(extra)s', {
                        'node': _utils.log_res(node),
                        'iinfo': node.instance_info,
                        'extra': extra
                    })
                self.connection.baremetal.update_node(node,
                                                      instance_info={},
                                                      extra=extra,
                                                      **kwargs)
            else:
                LOG.debug('Updating node %(node)s with extras %(extra)s', {
                    'node': _utils.log_res(node),
                    'extra': extra
                })
                self.connection.baremetal.update_node(node,
                                                      extra=extra,
                                                      **kwargs)
        except Exception as exc:
            LOG.debug('Failed to clear node %(node)s extra: %(exc)s', {
                'node': _utils.log_res(node),
                'exc': exc
            })
Beispiel #10
0
    def provision_node(self,
                       node,
                       image,
                       nics=None,
                       root_size_gb=None,
                       swap_size_mb=None,
                       config=None,
                       hostname=None,
                       netboot=False,
                       capabilities=None,
                       traits=None,
                       wait=None,
                       clean_up_on_failure=True):
        """Provision the node with the given image.

        Example::

         provisioner.provision_node("compute-1", "centos",
                                    nics=[{"network": "private"},
                                          {"network": "external"}],
                                    root_size_gb=50,
                                    wait=3600)

        :param node: Node object, UUID or name. Will be reserved first, if
            not reserved already. Must be in the "available" state with
            maintenance mode off.
        :param image: Image source - one of :mod:`~metalsmith.sources`,
            `Image` name or UUID.
        :param nics: List of virtual NICs to attach to physical ports.
            Each item is a dict with a key describing the type of the NIC:

            * ``{"port": "<port name or ID>"}`` to use the provided pre-created
              port.
            * ``{"network": "<network name or ID>"}`` to create a port on the
              provided network. Optionally, a ``fixed_ip`` argument can be used
              to specify an IP address.
            * ``{"subnet": "<subnet name or ID>"}`` to create a port with an IP
              address from the provided subnet. The network is determined from
              the subnet.

        :param root_size_gb: The size of the root partition. By default
            the value of the local_gb property is used.
        :param swap_size_mb: The size of the swap partition. It's an error
            to specify it for a whole disk image.
        :param config: configuration to pass to the instance, one of
            objects from :py:mod:`metalsmith.instance_config`.
        :param hostname: Hostname to assign to the instance. If provided,
            overrides the ``hostname`` passed to ``reserve_node``.
        :param netboot: Whether to use networking boot for final instances.
        :param capabilities: Requested capabilities of the node. If present,
            overwrites the capabilities set by :meth:`reserve_node`.
            Note that the capabilities are not checked against the ones
            provided by the node - use :meth:`reserve_node` for that.
        :param traits: Requested traits of the node. If present, overwrites
            the traits set by :meth:`reserve_node`. Note that the traits are
            not checked against the ones provided by the node - use
            :meth:`reserve_node` for that.
        :param wait: How many seconds to wait for the deployment to finish,
            None to return immediately.
        :param clean_up_on_failure: If True, then on failure the node is
            cleared of instance information, VIFs are detached, created ports
            and allocations are deleted.
        :return: :py:class:`metalsmith.Instance` object with the current
            status of provisioning. If ``wait`` is not ``None``, provisioning
            is already finished.
        :raises: :py:class:`metalsmith.exceptions.Error`
        """
        if config is None:
            config = instance_config.GenericConfig()
        if isinstance(image, str):
            image = sources.GlanceImage(image)

        _utils.check_hostname(hostname)

        try:
            node = self._get_node(node)
        except Exception as exc:
            raise exceptions.InvalidNode('Cannot find node %(node)s: %(exc)s' %
                                         {
                                             'node': node,
                                             'exc': exc
                                         })

        node, allocation = self._check_node_for_deploy(node, hostname)
        nics = _nics.NICs(self.connection,
                          node,
                          nics,
                          hostname=allocation and allocation.name or None)

        try:
            root_size_gb = _utils.get_root_disk(root_size_gb, node)

            image._validate(self.connection, root_size_gb)

            nics.validate()

            if capabilities is None:
                capabilities = node.instance_info.get('capabilities') or {}

            if self._dry_run:
                LOG.warning('Dry run, not provisioning node %s',
                            _utils.log_res(node))
                return node

            nics.create_and_attach_ports()

            capabilities['boot_option'] = 'netboot' if netboot else 'local'

            instance_info = self._clean_instance_info(node.instance_info)
            if root_size_gb is not None:
                instance_info['root_gb'] = root_size_gb
            instance_info['capabilities'] = capabilities
            if hostname:
                instance_info['display_name'] = hostname

            extra = node.extra.copy()
            extra[_CREATED_PORTS] = nics.created_ports
            extra[_ATTACHED_PORTS] = nics.attached_ports
            instance_info.update(image._node_updates(self.connection))
            if traits is not None:
                instance_info['traits'] = traits
            if swap_size_mb is not None:
                instance_info['swap_mb'] = swap_size_mb

            LOG.debug(
                'Updating node %(node)s with instance info %(iinfo)s '
                'and extras %(extra)s', {
                    'node': _utils.log_res(node),
                    'iinfo': instance_info,
                    'extra': extra
                })
            node = self.connection.baremetal.update_node(
                node, instance_info=instance_info, extra=extra)
            self.connection.baremetal.validate_node(node)

            network_data = _network_metadata.create_network_metadata(
                self.connection, node.extra.get(_ATTACHED_PORTS))

            LOG.debug('Generating a configdrive for node %s',
                      _utils.log_res(node))
            cd = config.generate(node, _utils.hostname_for(node, allocation),
                                 network_data)
            LOG.debug('Starting provisioning of node %s', _utils.log_res(node))
            self.connection.baremetal.set_node_provision_state(node,
                                                               'active',
                                                               config_drive=cd)
        except Exception:
            with _utils.reraise_os_exc(
                    exceptions.DeploymentFailed) as expected:
                if clean_up_on_failure:
                    LOG.error('Deploy attempt failed on node %s, cleaning up',
                              _utils.log_res(node),
                              exc_info=not expected)
                    self._clean_up(node, nics=nics)

        LOG.info('Provisioning started on node %s', _utils.log_res(node))

        if wait is not None:
            LOG.debug(
                'Waiting for node %(node)s to reach state active '
                'with timeout %(timeout)s', {
                    'node': _utils.log_res(node),
                    'timeout': wait
                })
            instance = self.wait_for_provisioning([node], timeout=wait)[0]
            LOG.info('Deploy succeeded on node %s', _utils.log_res(node))
        else:
            # Update the node to return it's latest state
            node = self.connection.baremetal.get_node(node.id)
            instance = _instance.Instance(self.connection, node, allocation)

        return instance
Beispiel #11
0
    def _check_node_for_deploy(self, node, hostname):
        """Check that node is ready and reserve it if needed.

        These checks are done outside of the try..except block in
        ``provision_node``, so that we don't touch nodes that fail it at all.
        Particularly, we don't want to try clean up nodes that were not
        reserved by us or are in maintenance mode.
        """
        if node.is_maintenance:
            raise exceptions.InvalidNode('Refusing to deploy on node %(node)s '
                                         'which is in maintenance mode due to '
                                         '%(reason)s' % {
                                             'node': _utils.log_res(node),
                                             'reason': node.maintenance_reason
                                         })

        allocation = None

        # Make sure the hostname does not correspond to an existing allocation
        # for another node.
        if hostname is not None:
            allocation = self._check_allocation_for_hostname(node, hostname)

        if node.allocation_id:
            if allocation is None:
                # Previously created allocation, verify/update it
                allocation = self._check_and_update_allocation_for_node(
                    node, hostname)
        elif node.instance_id:
            # Old-style reservations with instance_uuid==node.uuid
            if node.instance_id != node.id:
                raise exceptions.InvalidNode(
                    'Node %(node)s already reserved by instance %(inst)s '
                    'outside of metalsmith, cannot deploy on it' % {
                        'node': _utils.log_res(node),
                        'inst': node.instance_id
                    })
            elif hostname:
                # We have no way to update hostname without allocations
                raise exceptions.InvalidNode(
                    'Node %s does not use allocations, cannot update '
                    'hostname for it' % _utils.log_res(node))
        else:
            # Node is not reserved at all - reserve it
            if not node.resource_class:
                raise exceptions.InvalidNode(
                    'Cannot create an allocation for node %s that '
                    'does not have a resource class set' %
                    _utils.log_res(node))

            if not self._dry_run:
                if not hostname:
                    hostname = _utils.default_hostname(node)
                LOG.debug(
                    'Node %(node)s is not reserved yet, reserving for '
                    'hostname %(host)s', {
                        'node': _utils.log_res(node),
                        'host': hostname
                    })
                # Not updating instance_info since it will be updated later
                node, allocation = self._reserve_node(
                    node.resource_class,
                    hostname=hostname,
                    candidates=[node.id],
                    update_instance_info=False)

        return node, allocation
Beispiel #12
0
    def _reserve_node(self,
                      resource_class,
                      hostname=None,
                      candidates=None,
                      traits=None,
                      capabilities=None,
                      update_instance_info=True):
        """Create an allocation with given parameters."""
        if candidates:
            candidates = [(node.id if not isinstance(node, str) else node)
                          for node in candidates]

        LOG.debug(
            'Creating an allocation for resource class %(rsc)s '
            'with traits %(traits)s and candidate nodes %(candidates)s', {
                'rsc': resource_class,
                'traits': traits,
                'candidates': candidates
            })
        try:
            allocation = self.connection.baremetal.create_allocation(
                name=hostname,
                candidate_nodes=candidates,
                resource_class=resource_class,
                traits=traits)
        except os_exc.SDKException as exc:
            # Re-raise the expected exception class
            raise exceptions.ReservationFailed(
                'Failed to create an allocation: %s' % exc)

        node = None
        try:
            try:
                allocation = self.connection.baremetal.wait_for_allocation(
                    allocation)
            except os_exc.SDKException as exc:
                # Re-raise the expected exception class
                raise exceptions.ReservationFailed(
                    'Failed to reserve a node: %s' % exc)

            LOG.info('Successful allocation %(alloc)s for host %(host)s', {
                'alloc': allocation,
                'host': hostname
            })
            node = self.connection.baremetal.get_node(allocation.node_id)

            if update_instance_info:
                node = self._patch_reserved_node(node, allocation, hostname,
                                                 capabilities)
        except Exception as exc:
            with _utils.reraise_os_exc(
                    exceptions.ReservationFailed,
                    'Failed to delete failed allocation') as expected:
                LOG.error(
                    'Processing allocation %(alloc)s for node %(node)s '
                    'failed: %(exc)s; deleting allocation', {
                        'alloc': _utils.log_res(allocation),
                        'node': _utils.log_res(node),
                        'exc': exc
                    },
                    exc_info=not expected)
                self.connection.baremetal.delete_allocation(allocation)

        LOG.debug('Reserved node: %s', node)
        return node, allocation