Example #1
0
def fetch_image_stream_optimized(context, instance, session, vm_name,
                                 ds_name, vm_folder_ref, res_pool_ref):
    """Fetch image from Glance to ESX datastore."""
    image_ref = instance.image_ref
    LOG.debug("Downloading image file data %(image_ref)s to the ESX "
              "as VM named '%(vm_name)s'",
              {'image_ref': image_ref, 'vm_name': vm_name},
              instance=instance)

    metadata = IMAGE_API.get(context, image_ref)
    file_size = int(metadata['size'])

    vm_import_spec = _build_import_spec_for_import_vapp(
            session, vm_name, ds_name)

    read_iter = IMAGE_API.download(context, image_ref)
    read_handle = rw_handles.ImageReadHandle(read_iter)

    write_handle = rw_handles.VmdkWriteHandle(session,
                                              session._host,
                                              session._port,
                                              res_pool_ref,
                                              vm_folder_ref,
                                              vm_import_spec,
                                              file_size)
    image_transfer(read_handle, write_handle)

    imported_vm_ref = write_handle.get_imported_vm()

    LOG.info(_LI("Downloaded image file data %(image_ref)s"),
             {'image_ref': instance.image_ref}, instance=instance)
    vmdk = vm_util.get_vmdk_info(session, imported_vm_ref, vm_name)
    session._call_method(session.vim, "UnregisterVM", imported_vm_ref)
    LOG.info(_LI("The imported VM was unregistered"), instance=instance)
    return vmdk.capacity_in_bytes
    def _power_on_vm(self, instance, vm_ref):
        LOG.info(_LI("Powering on the VM: %s."), instance)
        power_on_task = self._session._call_method(self._session.vim,
                                                   "PowerOnVM_Task", vm_ref)

        self._session._wait_for_task(power_on_task)
        LOG.info(_LI("Powered on the VM: %s."), instance)
Example #3
0
    def _remove_base_file(self, base_file):
        """Remove a single base file if it is old enough.

        Returns nothing.
        """
        if not os.path.exists(base_file):
            LOG.debug('Cannot remove %s, it does not exist',
                      base_file)
            return

        mtime = os.path.getmtime(base_file)
        age = time.time() - mtime

        maxage = CONF.libvirt.remove_unused_resized_minimum_age_seconds
        if base_file in self.originals:
            maxage = CONF.remove_unused_original_minimum_age_seconds

        if age < maxage:
            LOG.info(_LI('Base file too young to remove: %s'),
                     base_file)
        else:
            LOG.info(_LI('Removing base file: %s'), base_file)
            try:
                os.remove(base_file)
                signature = get_info_filename(base_file)
                if os.path.exists(signature):
                    os.remove(signature)
            except OSError as e:
                LOG.error(_LE('Failed to remove %(base_file)s, '
                              'error was %(error)s'),
                          {'base_file': base_file,
                           'error': e})
    def handshake(self, req, connect_info, sockets):
        """Execute hypervisor-specific vnc auth handshaking (if needed)."""
        host = connect_info['host']
        port = int(connect_info['port'])

        server = eventlet.connect((host, port))

        # Handshake as necessary
        if connect_info.get('internal_access_path'):
            server.sendall("CONNECT %s HTTP/1.1\r\n\r\n" %
                        connect_info['internal_access_path'])

            data = ""
            while True:
                b = server.recv(1)
                if b:
                    data += b
                    if data.find("\r\n\r\n") != -1:
                        if not data.split("\r\n")[0].find("200"):
                            LOG.info(_LI("Error in handshake format: %s"),
                                     data)
                            return
                        break

                if not b or len(data) > 4096:
                    LOG.info(_LI("Error in handshake: %s"), data)
                    return

        client = req.environ['eventlet.input'].get_socket()
        client.sendall("HTTP/1.1 200 OK\r\n\r\n")
        sockets['client'] = client
        sockets['server'] = server
Example #5
0
def find_guest_agent(base_dir):
    """tries to locate a guest agent at the path
    specified by agent_rel_path
    """
    if CONF.xenserver.disable_agent:
        return False

    agent_rel_path = CONF.xenserver.agent_path
    agent_path = os.path.join(base_dir, agent_rel_path)
    if os.path.isfile(agent_path):
        # The presence of the guest agent
        # file indicates that this instance can
        # reconfigure the network from xenstore data,
        # so manipulation of files in /etc is not
        # required
        LOG.info(_LI('XenServer tools installed in this '
                     'image are capable of network injection.  '
                     'Networking files will not be'
                     'manipulated'))
        return True
    xe_daemon_filename = os.path.join(base_dir,
        'usr', 'sbin', 'xe-daemon')
    if os.path.isfile(xe_daemon_filename):
        LOG.info(_LI('XenServer tools are present '
                     'in this image but are not capable '
                     'of network injection'))
    else:
        LOG.info(_LI('XenServer tools are not '
                     'installed in this image'))
    return False
Example #6
0
    def _log_and_attach(bdm):
        context = attach_args[0]
        instance = attach_args[1]
        if bdm.get('volume_id'):
            LOG.info(_LI('Booting with volume %(volume_id)s at '
                         '%(mountpoint)s'),
                     {'volume_id': bdm.volume_id,
                      'mountpoint': bdm['mount_device']},
                     context=context, instance=instance)
        elif bdm.get('snapshot_id'):
            LOG.info(_LI('Booting with volume snapshot %(snapshot_id)s at '
                         '%(mountpoint)s'),
                     {'snapshot_id': bdm.snapshot_id,
                      'mountpoint': bdm['mount_device']},
                     context=context, instance=instance)
        elif bdm.get('image_id'):
            LOG.info(_LI('Booting with volume-backed-image %(image_id)s at '
                         '%(mountpoint)s'),
                     {'image_id': bdm.image_id,
                      'mountpoint': bdm['mount_device']},
                     context=context, instance=instance)
        else:
            LOG.info(_LI('Booting with blank volume at %(mountpoint)s'),
                     {'mountpoint': bdm['mount_device']},
                     context=context, instance=instance)

        bdm.attach(*attach_args, **attach_kwargs)
Example #7
0
 def _report_final_resource_view(self):
     """Report final calculate of physical memory, used virtual memory,
     disk, usable vCPUs, used virtual CPUs and PCI devices,
     including instance calculations and in-progress resource claims. These
     values will be exposed via the compute node table to the scheduler.
     """
     vcpus = self.compute_node.vcpus
     if vcpus:
         tcpu = vcpus
         ucpu = self.compute_node.vcpus_used
         LOG.info(_LI("Total usable vcpus: %(tcpu)s, "
                     "total allocated vcpus: %(ucpu)s"),
                     {'tcpu': vcpus,
                      'ucpu': ucpu})
     else:
         tcpu = 0
         ucpu = 0
     pci_stats = self.compute_node.pci_device_pools
     LOG.info(_LI("Final resource view: "
                  "name=%(node)s "
                  "phys_ram=%(phys_ram)sMB "
                  "used_ram=%(used_ram)sMB "
                  "phys_disk=%(phys_disk)sGB "
                  "used_disk=%(used_disk)sGB "
                  "total_vcpus=%(total_vcpus)s "
                  "used_vcpus=%(used_vcpus)s "
                  "pci_stats=%(pci_stats)s"),
              {'node': self.nodename,
               'phys_ram': self.compute_node.memory_mb,
               'used_ram': self.compute_node.memory_mb_used,
               'phys_disk': self.compute_node.local_gb,
               'used_disk': self.compute_node.local_gb_used,
               'total_vcpus': tcpu,
               'used_vcpus': ucpu,
               'pci_stats': pci_stats})
Example #8
0
    def _get_or_upload_image_lu(self, context, img_meta):
        """Ensures our SSP has an LU containing the specified image.

        If an LU of type IMAGE corresponding to the input image metadata
        already exists in our SSP, return it.  Otherwise, create it, prime it
        with the image contents from glance, and return it.

        :param context: nova context used to retrieve image from glance
        :param img_meta: image metadata dict:
                      { 'id': reference used to locate the image in glance,
                        'size': size in bytes of the image. }
        :return: A pypowervm LU ElementWrapper representing the image.
        """
        # Key off of the name to see whether we already have the image
        luname = self._get_image_name(img_meta)
        ssp = self._ssp
        for lu in ssp.logical_units:
            if lu.lu_type == pvm_stg.LUType.IMAGE and lu.name == luname:
                LOG.info(_LI('SSP: Using already-uploaded image LU %s.'),
                         luname)
                return lu

        # We don't have it yet.  Create it and upload the glance image to it.
        # Make the image LU only as big as the image.
        stream = self._get_image_upload(context, img_meta)
        LOG.info(_LI('SSP: Uploading new image LU %s.'), luname)
        lu, f_wrap = tsk_stg.upload_new_lu(self._any_vios_uuid(), ssp, stream,
                                           luname, img_meta['size'])
        return lu
    def update_available_resource(self, context):
        """Override in-memory calculations of compute node resource usage based
        on data audited from the hypervisor layer.

        Add in resource claims in progress to account for operations that have
        declared a need for resources, but not necessarily retrieved them from
        the hypervisor layer yet.
        """
        LOG.info(_LI("Auditing locally available compute resources for "
                     "node %(node)s"),
                 {'node': self.nodename})
        resources = self.driver.get_available_resource(self.nodename)

        if not resources:
            # The virt driver does not support this function
            LOG.info(_LI("Virt driver does not support "
                 "'get_available_resource'. Compute tracking is disabled."))
            self.compute_node = None
            return
        resources['host_ip'] = CONF.my_ip

        # We want the 'cpu_info' to be None from the POV of the
        # virt driver, but the DB requires it to be non-null so
        # just force it to empty string
        if ("cpu_info" not in resources or
            resources["cpu_info"] is None):
            resources["cpu_info"] = ''

        self._verify_resources(resources)

        self._report_hypervisor_resource_view(resources)

        self._update_available_resource(context, resources)
Example #10
0
    def _create_resource_class(self, name):
        """Calls the placement API to create a new resource class.

        :param name: String name of the resource class to create.

        :returns: None on successful creation.
        :raises: `exception.InvalidResourceClass` upon error.
        """
        url = "/resource_classes"
        payload = {
            'name': name,
        }
        resp = self.post(url, payload, version="1.2")
        if 200 <= resp.status_code < 300:
            msg = _LI("Created resource class record via placement API "
                      "for resource class %s.")
            LOG.info(msg, name)
        elif resp.status_code == 409:
            # Another thread concurrently created a resource class with the
            # same name. Log a warning and then just return
            msg = _LI("Another thread already created a resource class "
                      "with the name %s. Returning.")
            LOG.info(msg, name)
        else:
            msg = _LE("Failed to create resource class %(resource_class)s in "
                      "placement API. Got %(status_code)d: %(err_text)s.")
            args = {
                'resource_class': name,
                'status_code': resp.status_code,
                'err_text': resp.text,
            }
            LOG.error(msg, args)
            raise exception.InvalidResourceClass(resource_class=name)
Example #11
0
    def _test(self, type_, unit, total, used, requested, limit):
        """Test if the given type of resource needed for a claim can be safely
        allocated.
        """
        LOG.info(_LI('Total %(type)s: %(total)d %(unit)s, used: %(used).02f '
                    '%(unit)s'),
                  {'type': type_, 'total': total, 'unit': unit, 'used': used},
                  instance=self.instance)

        if limit is None:
            # treat resource as unlimited:
            LOG.info(_LI('%(type)s limit not specified, defaulting to '
                        'unlimited'), {'type': type_}, instance=self.instance)
            return

        free = limit - used

        # Oversubscribed resource policy info:
        LOG.info(_LI('%(type)s limit: %(limit).02f %(unit)s, '
                     'free: %(free).02f %(unit)s'),
                  {'type': type_, 'limit': limit, 'free': free, 'unit': unit},
                  instance=self.instance)

        if requested > free:
            return (_('Free %(type)s %(free).02f '
                      '%(unit)s < requested %(requested)d %(unit)s') %
                      {'type': type_, 'free': free, 'unit': unit,
                       'requested': requested})
Example #12
0
    def _sync_compute_node(self, context, resources):
        """Create or update the compute node DB record."""
        if not self.compute_node:
            # we need a copy of the ComputeNode record:
            service = self._get_service(context)
            if not service:
                # no service record, disable resource
                return

            compute_node_refs = service['compute_node']
            if compute_node_refs:
                for cn in compute_node_refs:
                    if cn.get('hypervisor_hostname') == self.nodename:
                        self.compute_node = cn
                        if self.pci_tracker:
                            self.pci_tracker.set_compute_node_id(cn['id'])
                        break

        if not self.compute_node:
            # Need to create the ComputeNode record:
            resources['service_id'] = service['id']
            self._create(context, resources)
            if self.pci_tracker:
                self.pci_tracker.set_compute_node_id(self.compute_node['id'])
            LOG.info(_LI('Compute_service record created for '
                         '%(host)s:%(node)s'),
                     {'host': self.host, 'node': self.nodename})

        else:
            # just update the record:
            self._update(context, resources)
            LOG.info(_LI('Compute_service record updated for '
                         '%(host)s:%(node)s'),
                     {'host': self.host, 'node': self.nodename})
Example #13
0
    def __exit__(self, ex_type, ex_value, ex_traceback):
        if not ex_value:
            return True

        if isinstance(ex_value, exception.Forbidden):
            raise Fault(webob.exc.HTTPForbidden(
                    explanation=ex_value.format_message()))
        elif isinstance(ex_value, exception.VersionNotFoundForAPIMethod):
            raise
        elif isinstance(ex_value, exception.Invalid):
            raise Fault(exception.ConvertedException(
                    code=ex_value.code,
                    explanation=ex_value.format_message()))
        elif isinstance(ex_value, TypeError):
            exc_info = (ex_type, ex_value, ex_traceback)
            LOG.error(_LE('Exception handling resource: %s'), ex_value,
                      exc_info=exc_info)
            raise Fault(webob.exc.HTTPBadRequest())
        elif isinstance(ex_value, Fault):
            LOG.info(_LI("Fault thrown: %s"), ex_value)
            raise ex_value
        elif isinstance(ex_value, webob.exc.HTTPException):
            LOG.info(_LI("HTTP exception thrown: %s"), ex_value)
            raise Fault(ex_value)

        # We didn't handle the exception
        return False
Example #14
0
    def _test(self, type_, unit, total, used, requested, limit):
        """Test if the given type of resource needed for a claim can be safely
        allocated.
        """
        LOG.info(
            _LI("Total %(type)s: %(total)d %(unit)s, used: %(used).02f " "%(unit)s"),
            {"type": type_, "total": total, "unit": unit, "used": used},
            instance=self.instance,
        )

        if limit is None:
            # treat resource as unlimited:
            LOG.info(
                _LI("%(type)s limit not specified, defaulting to " "unlimited"), {"type": type_}, instance=self.instance
            )
            return

        free = limit - used

        # Oversubscribed resource policy info:
        LOG.info(
            _LI("%(type)s limit: %(limit).02f %(unit)s, " "free: %(free).02f %(unit)s"),
            {"type": type_, "limit": limit, "free": free, "unit": unit},
            instance=self.instance,
        )

        if requested > free:
            return _("Free %(type)s %(free).02f " "%(unit)s < requested %(requested)d %(unit)s") % {
                "type": type_,
                "free": free,
                "unit": unit,
                "requested": requested,
            }
Example #15
0
    def __exit__(self, ex_type, ex_value, ex_traceback):
        if not ex_value:
            return True

        if isinstance(ex_value, exception.Forbidden):
            raise Fault(webob.exc.HTTPForbidden(
                    explanation=ex_value.format_message()))
        elif isinstance(ex_value, exception.Invalid):
            raise Fault(exception.ConvertedException(
                    code=ex_value.code,
                    explanation=ex_value.format_message()))

        # Under python 2.6, TypeError's exception value is actually a string,
        # so test # here via ex_type instead:
        # http://bugs.python.org/issue7853
        elif issubclass(ex_type, TypeError):
            exc_info = (ex_type, ex_value, ex_traceback)
            LOG.error(_LE('Exception handling resource: %s'), ex_value,
                      exc_info=exc_info)
            raise Fault(webob.exc.HTTPBadRequest())
        elif isinstance(ex_value, Fault):
            LOG.info(_LI("Fault thrown: %s"), unicode(ex_value))
            raise ex_value
        elif isinstance(ex_value, webob.exc.HTTPException):
            LOG.info(_LI("HTTP exception thrown: %s"), unicode(ex_value))
            raise Fault(ex_value)

        # We didn't handle the exception
        return False
Example #16
0
    def sync_instance_info(self, context, host_name, instance_uuids):
        """Receives the uuids of the instances on a host.

        This method is periodically called by the compute nodes, which send a
        list of all the UUID values for the instances on that node. This is
        used by the scheduler's HostManager to detect when its view of the
        compute node's instances is out of sync.
        """
        host_info = self._instance_info.get(host_name)
        if host_info:
            local_set = set(host_info["instances"].keys())
            compute_set = set(instance_uuids)
            if not local_set == compute_set:
                self._recreate_instance_info(context, host_name)
                LOG.info(
                    _LI("The instance sync for host '%s' did not match. " "Re-created its InstanceList."), host_name
                )
                return
            host_info["updated"] = True
            LOG.info(_LI("Successfully synced instances from host '%s'."), host_name)
        else:
            self._recreate_instance_info(context, host_name)
            LOG.info(
                _LI("Received a sync request from an unknown host '%s'. " "Re-created its InstanceList."), host_name
            )
Example #17
0
    def setup_basic_filtering(self, instance, network_info):
        """Set up basic filtering (MAC, IP, and ARP spoofing protection)."""
        LOG.info(_LI('Called setup_basic_filtering in nwfilter'),
                 instance=instance)

        if self.handle_security_groups:
            # No point in setting up a filter set that we'll be overriding
            # anyway.
            return

        LOG.info(_LI('Ensuring static filters'), instance=instance)
        self._ensure_static_filters()

        nodhcp_base_filter = self.get_base_filter_list(instance, False)
        dhcp_base_filter = self.get_base_filter_list(instance, True)

        for vif in network_info:
            _base_filter = nodhcp_base_filter
            for subnet in vif['network']['subnets']:
                if subnet.get_meta('dhcp_server'):
                    _base_filter = dhcp_base_filter
                    break
            self._define_filter(self._get_instance_filter_xml(instance,
                                                              _base_filter,
                                                              vif))
Example #18
0
def soft_shutdown(instance, timeout=0, retry_interval=0):
    """If ACPI is available for this instance the ACPI shutdown button
    will be pressed.

    :param instance:       nova.objects.instance.Instance
    :param timeout:        time to wait for GuestOS to shutdown
    :param retry_interval: how often to signal guest while waiting
                           for it to shutdown

    .. note:
        We fall back to hard reboot if instance does not shutdown
        within this window.
    """
    if timeout <= 0:
        LOG.debug("No timeout provided, assuming %d",
                  CONF.virtualbox.wait_soft_reboot_seconds)
        timeout = CONF.virtualbox.wait_soft_reboot_seconds

    if retry_interval <= 0:
        LOG.debug("No retry_interval provided, assuming %d",
                  constants.SHUTDOWN_RETRY_INTERVAL)
        retry_interval = constants.SHUTDOWN_RETRY_INTERVAL

    instance_info = manage.VBoxManage.show_vm_info(instance)
    desired_power_state = constants.STATE_POWER_OFF

    if not instance_info.get(constants.VM_ACPI, 'off') == 'on':
        return False

    LOG.debug("Performing soft shutdown on instance", instance=instance)
    while timeout > 0:
        wait_time = min(retry_interval, timeout)
        try:
            LOG.debug("Soft shutdown instance, timeout remaining: %d",
                      timeout, instance=instance)
            try:
                manage.VBoxManage.control_vm(instance,
                                             constants.ACPI_POWER_BUTTON)
            except nova_exception.InstanceInvalidState:
                if get_power_state(instance) == desired_power_state:
                    LOG.info(i18n._LI("Soft shutdown succeeded."),
                             instance=instance)
                    return True
                raise

            if wait_for_power_state(instance, desired_power_state, wait_time):
                LOG.info(i18n._LI("Soft shutdown succeeded."),
                         instance=instance)
                return True

        except exception.VBoxException as exc:
            LOG.debug("Soft shutdown failed: %s", exc, instance=instance)
            time.sleep(wait_time)

        timeout -= retry_interval

    LOG.warning(i18n._LW("Timed out while waiting for soft shutdown."),
                instance=instance)
    return False
Example #19
0
    def _update_inventory_attempt(self, rp_uuid, inv_data):
        """Update the inventory for this resource provider if needed.

        :param rp_uuid: The resource provider UUID for the operation
        :param inv_data: The new inventory for the resource provider
        :returns: True if the inventory was updated (or did not need to be),
                  False otherwise.
        """
        curr = self._get_inventory_and_update_provider_generation(rp_uuid)

        # Check to see if we need to update placement's view
        if inv_data == curr.get('inventories', {}):
            return True

        cur_rp_gen = self._resource_providers[rp_uuid].generation
        payload = {
            'resource_provider_generation': cur_rp_gen,
            'inventories': inv_data,
        }
        url = '/resource_providers/%s/inventories' % rp_uuid
        result = self.put(url, payload)
        if result.status_code == 409:
            LOG.info(_LI('Inventory update conflict for %s'),
                     rp_uuid)
            # Invalidate our cache and re-fetch the resource provider
            # to be sure to get the latest generation.
            del self._resource_providers[rp_uuid]
            # NOTE(jaypipes): We don't need to pass a name parameter to
            # _ensure_resource_provider() because we know the resource provider
            # record already exists. We're just reloading the record here.
            self._ensure_resource_provider(rp_uuid)
            return False
        elif not result:
            LOG.warning(_LW('Failed to update inventory for resource provider '
                            '%(uuid)s: %(status)i %(text)s'),
                        {'uuid': rp_uuid,
                         'status': result.status_code,
                         'text': result.text})
            return False

        if result.status_code != 200:
            LOG.info(
                _LI('Received unexpected response code %(code)i while '
                    'trying to update inventory for resource provider %(uuid)s'
                    ': %(text)s'),
                {'uuid': rp_uuid,
                 'code': result.status_code,
                 'text': result.text})
            return False

        # Update our view of the generation for next time
        updated_inventories_result = result.json()
        new_gen = updated_inventories_result['resource_provider_generation']

        self._resource_providers[rp_uuid].generation = new_gen
        LOG.debug('Updated inventory for %s at generation %i',
                  rp_uuid, new_gen)
        return True
Example #20
0
    def _delete_inventory(self, rp_uuid):
        """Deletes all inventory records for a resource provider with the
        supplied UUID.
        """
        curr = self._get_inventory_and_update_provider_generation(rp_uuid)

        # Check to see if we need to update placement's view
        if not curr.get('inventories', {}):
            msg = "No inventory to delete from resource provider %s."
            LOG.debug(msg, rp_uuid)
            return

        msg = _LI("Compute node %s reported no inventory but previous "
                  "inventory was detected. Deleting existing inventory "
                  "records.")
        LOG.info(msg, rp_uuid)

        url = '/resource_providers/%s/inventories' % rp_uuid
        cur_rp_gen = self._resource_providers[rp_uuid].generation
        payload = {
            'resource_provider_generation': cur_rp_gen,
            'inventories': {},
        }
        r = self.put(url, payload)
        if r.status_code == 200:
            # Update our view of the generation for next time
            updated_inv = r.json()
            new_gen = updated_inv['resource_provider_generation']

            self._resource_providers[rp_uuid].generation = new_gen
            msg_args = {
                'rp_uuid': rp_uuid,
                'generation': new_gen,
            }
            LOG.info(_LI('Deleted all inventory for resource provider '
                         '%(rp_uuid)s at generation %(generation)i'),
                     msg_args)
            return
        elif r.status_code == 409:
            rc_str = _extract_inventory_in_use(r.text)
            if rc_str is not None:
                msg = _LW("We cannot delete inventory %(rc_str)s for resource "
                          "provider %(rp_uuid)s because the inventory is "
                          "in use.")
                msg_args = {
                    'rp_uuid': rp_uuid,
                    'rc_str': rc_str,
                }
                LOG.warning(msg, msg_args)
                return

        msg = _LE("Failed to delete inventory for resource provider "
                  "%(rp_uuid)s. Got error response: %(err)s")
        msg_args = {
            'rp_uuid': rp_uuid,
            'err': r.text,
        }
        LOG.error(msg, msg_args)
Example #21
0
 def get_filtered_objects(self, filters, objs, filter_properties, index=0):
     list_objs = list(objs)
     LOG.debug("Starting with %d host(s)", len(list_objs))
     # Track the hosts as they are removed. The 'full_filter_results' list
     # contains the host/nodename info for every host that passes each
     # filter, while the 'part_filter_results' list just tracks the number
     # removed by each filter, unless the filter returns zero hosts, in
     # which case it records the host/nodename for the last batch that was
     # removed. Since the full_filter_results can be very large, it is only
     # recorded if the LOG level is set to debug.
     part_filter_results = []
     full_filter_results = []
     log_msg = "%(cls_name)s: (start: %(start)s, end: %(end)s)"
     for filter_ in filters:
         if filter_.run_filter_for_index(index):
             cls_name = filter_.__class__.__name__
             start_count = len(list_objs)
             objs = filter_.filter_all(list_objs, filter_properties)
             if objs is None:
                 LOG.debug("Filter %s says to stop filtering", cls_name)
                 return
             list_objs = list(objs)
             end_count = len(list_objs)
             part_filter_results.append(log_msg % {"cls_name": cls_name,
                     "start": start_count, "end": end_count})
             if list_objs:
                 remaining = [(getattr(obj, "host", obj),
                               getattr(obj, "nodename", ""))
                              for obj in list_objs]
                 full_filter_results.append((cls_name, remaining))
             else:
                 LOG.info(_LI("Filter %s returned 0 hosts"), cls_name)
                 full_filter_results.append((cls_name, None))
                 break
             LOG.debug("Filter %(cls_name)s returned "
                       "%(obj_len)d host(s)",
                       {'cls_name': cls_name, 'obj_len': len(list_objs)})
     if not list_objs:
         # Log the filtration history
         rspec = filter_properties.get("request_spec", {})
         inst_props = rspec.get("instance_properties", {})
         msg_dict = {"res_id": inst_props.get("reservation_id", ""),
                     "inst_uuid": inst_props.get("uuid", ""),
                     "str_results": str(full_filter_results),
                    }
         full_msg = ("Filtering removed all hosts for the request with "
                     "reservation ID '%(res_id)s' and instance ID "
                     "'%(inst_uuid)s'. Filter results: %(str_results)s"
                    ) % msg_dict
         msg_dict["str_results"] = str(part_filter_results)
         part_msg = _LI("Filtering removed all hosts for the request with "
                        "reservation ID '%(res_id)s' and instance ID "
                        "'%(inst_uuid)s'. Filter results: %(str_results)s"
                        ) % msg_dict
         LOG.debug(full_msg)
         LOG.info(part_msg)
     return list_objs
    def _wait_and_get_portgroup_details(self, session, vm_ref,
                                        port_group_name):
        """Gets reference to the portgroup for the vm."""

        max_counts = CONF.vmware.vmwareapi_nic_attach_retry_count
        count = 0
        network_obj = {}
        LOG.info(_LI("Waiting for the portgroup %s to be created."),
                 port_group_name)
        while count < max_counts:
            host = session._call_method(vim_util, "get_dynamic_property",
                                        vm_ref, "VirtualMachine",
                                        "runtime.host")
            vm_networks_ret = session._call_method(vim_util,
                                                   "get_dynamic_property",
                                                   host, "HostSystem",
                                                   "network")
            if vm_networks_ret:
                vm_networks = vm_networks_ret.ManagedObjectReference
                for network in vm_networks:
                    # Get network properties.
                    if network._type == 'DistributedVirtualPortgroup':
                        props = session._call_method(vim_util,
                                                     "get_dynamic_property",
                                                     network,
                                                     network._type,
                                                     "config")
                        if props.name in port_group_name:
                            LOG.info(_LI("DistributedVirtualPortgroup "
                                         "created."))
                            network_obj['type'] = 'DistributedVirtualPortgroup'
                            network_obj['dvpg'] = props.key
                            dvs_props = session._call_method(
                                vim_util,
                                "get_dynamic_property",
                                props.distributedVirtualSwitch,
                                "VmwareDistributedVirtualSwitch",
                                "uuid")
                            network_obj['dvsw'] = dvs_props
                            network_obj['dvpg-name'] = props.name
                            return network_obj
                    elif network._type == 'Network':
                        netname = session._call_method(vim_util,
                                                       "get_dynamic_property",
                                                       network,
                                                       network._type,
                                                       "name")
                        if netname in port_group_name:
                            LOG.info(_LI("Standard Switch Portgroup created."))
                            network_obj['type'] = 'Network'
                            network_obj['name'] = port_group_name
                            return network_obj
                count = count + 1
                LOG.info(_LI("Portgroup not created. Retrying again "
                             "after 2 seconds."))
                greenthread.sleep(2)
        return None
Example #23
0
def fetch_image_ova(context, instance, session, vm_name, ds_name,
                    vm_folder_ref, res_pool_ref):
    """Download the OVA image from the glance image server to the
    Nova compute node.
    """
    image_ref = instance.image_ref
    LOG.debug("Downloading OVA image file %(image_ref)s to the ESX "
              "as VM named '%(vm_name)s'",
              {'image_ref': image_ref, 'vm_name': vm_name},
              instance=instance)

    metadata = IMAGE_API.get(context, image_ref)
    file_size = int(metadata['size'])

    vm_import_spec = _build_import_spec_for_import_vapp(
        session, vm_name, ds_name)

    read_iter = IMAGE_API.download(context, image_ref)
    read_handle = rw_handles.ImageReadHandle(read_iter)

    with tarfile.open(mode="r|", fileobj=read_handle) as tar:
        vmdk_name = None
        for tar_info in tar:
            if tar_info and tar_info.name.endswith(".ovf"):
                extracted = tar.extractfile(tar_info)
                xmlstr = extracted.read()
                vmdk_name = get_vmdk_name_from_ovf(xmlstr)
            elif vmdk_name and tar_info.name.startswith(vmdk_name):
                # Actual file name is <vmdk_name>.XXXXXXX
                extracted = tar.extractfile(tar_info)
                write_handle = rw_handles.VmdkWriteHandle(
                    session,
                    session._host,
                    session._port,
                    res_pool_ref,
                    vm_folder_ref,
                    vm_import_spec,
                    file_size)
                start_transfer(context,
                               extracted,
                               file_size,
                               write_file_handle=write_handle)
                extracted.close()
                LOG.info(_LI("Downloaded OVA image file %(image_ref)s"),
                    {'image_ref': instance.image_ref}, instance=instance)
                imported_vm_ref = write_handle.get_imported_vm()
                vmdk = vm_util.get_vmdk_info(session,
                                             imported_vm_ref,
                                             vm_name)
                session._call_method(session.vim, "UnregisterVM",
                                     imported_vm_ref)
                LOG.info(_LI("The imported VM was unregistered"),
                         instance=instance)
                return vmdk.capacity_in_bytes
        raise exception.ImageUnacceptable(
            reason=_("Extracting vmdk from OVA failed."),
            image_id=image_ref)
Example #24
0
    def _create_resource_provider(self, uuid, name):
        """Calls the placement API to create a new resource provider record.

        Returns a dict of resource provider information object representing
        the newly-created resource provider.

        :param uuid: UUID of the new resource provider
        :param name: Name of the resource provider
        """
        url = "/resource_providers"
        payload = {
            'uuid': uuid,
            'name': name,
        }
        resp = self.post(url, payload)
        placement_req_id = get_placement_request_id(resp)
        if resp.status_code == 201:
            msg = _LI("[%(placement_req_id)s] Created resource provider "
                      "record via placement API for resource provider with "
                      "UUID %(uuid)s and name %(name)s.")
            args = {
                'uuid': uuid,
                'name': name,
                'placement_req_id': placement_req_id,
            }
            LOG.info(msg, args)
            return dict(
                    uuid=uuid,
                    name=name,
                    generation=0,
            )
        elif resp.status_code == 409:
            # Another thread concurrently created a resource provider with the
            # same UUID. Log a warning and then just return the resource
            # provider object from _get_resource_provider()
            msg = _LI("[%(placement_req_id)s] Another thread already created "
                      "a resource provider with the UUID %(uuid)s. Grabbing "
                      "that record from the placement API.")
            args = {
                'uuid': uuid,
                'placement_req_id': placement_req_id,
            }
            LOG.info(msg, args)
            return self._get_resource_provider(uuid)
        else:
            msg = _LE("[%(placement_req_id)s] Failed to create resource "
                      "provider record in placement API for UUID %(uuid)s. "
                      "Got %(status_code)d: %(err_text)s.")
            args = {
                'uuid': uuid,
                'status_code': resp.status_code,
                'err_text': resp.text,
                'placement_req_id': placement_req_id,
            }
            LOG.error(msg, args)
Example #25
0
    def execute(self, lpar_wrap):
        LOG.info(_LI('Plugging the Network Interfaces to instance %s'),
                 self.instance.name)

        # Get the current adapters on the system
        cna_w_list = vm.get_cnas(self.adapter, self.instance, self.host_uuid)

        # Trim the VIFs down to the ones that haven't yet been created.
        crt_vifs = []
        for vif in self.network_info:
            for cna_w in cna_w_list:
                if vm.norm_mac(cna_w.mac) == vif['address']:
                    break
            else:
                crt_vifs.append(vif)

        # If there are no vifs to create, then just exit immediately.
        if len(crt_vifs) == 0:
            return []

        # Check to see if the LPAR is OK to add VIFs to.
        modifiable, reason = lpar_wrap.can_modify_io()
        if not modifiable:
            LOG.error(_LE('Unable to create VIF(s) for instance %(sys)s.  The '
                          'VM was in a state where VIF plugging is not '
                          'acceptable.  The reason from the system is: '
                          '%(reason)s'),
                      {'sys': self.instance.name, 'reason': reason},
                      instance=self.instance)
            raise exception.VirtualInterfaceCreateException()

        # For the VIFs, run the creates (and wait for the events back)
        try:
            with self.virt_api.wait_for_instance_event(
                    self.instance, self._get_vif_events(),
                    deadline=CONF.vif_plugging_timeout,
                    error_callback=self._vif_callback_failed):
                for vif in crt_vifs:
                    LOG.info(_LI('Creating VIF with mac %(mac)s for instance '
                                 '%(sys)s'),
                             {'mac': vif['address'],
                              'sys': self.instance.name},
                             instance=self.instance)
                    vm.crt_vif(self.adapter, self.instance, self.host_uuid,
                               vif)
        except eventlet.timeout.Timeout:
            LOG.error(_LE('Error waiting for VIF to be created for instance '
                          '%(sys)s'), {'sys': self.instance.name},
                      instance=self.instance)
            raise exception.VirtualInterfaceCreateException()

        # Return the list of created VIFs.
        return cna_w_list
Example #26
0
    def _add_maps_for_fabric(self, fabric):
        """Adds the vFC storage mappings to the VM for a given fabric.

        Will check if the Fabric is mapped to the management partition.  If it
        is, then it will remove the mappings and update the fabric state. This
        is because, in order for the WWPNs to be on the fabric (for Cinder)
        before the VM is online, the WWPNs get mapped to the management
        partition.

        This method will remove from the management partition (if needed), and
        then assign it to the instance itself.

        :param fabric: The fabric to add the mappings to.
        """
        npiv_port_maps = self._get_fabric_meta(fabric)
        vios_wraps = self.stg_ftsk.feed

        # If currently mapped to the mgmt partition, remove the mappings so
        # that they can be added to the client.
        if self._get_fabric_state(fabric) == FS_MGMT_MAPPED:
            mgmt_uuid = mgmt.get_mgmt_partition(self.adapter).uuid

            # Each port mapping should be removed from the VIOS.
            for npiv_port_map in npiv_port_maps:
                vios_w = pvm_vfcm.find_vios_for_port_map(
                    vios_wraps, npiv_port_map)
                ls = [LOG.info, _LI("Removing NPIV mapping for mgmt partition "
                                    "for instance %(inst)s on VIOS %(vios)s."),
                      {'inst': self.instance.name, 'vios': vios_w.name}]

                # Add the subtask to remove the map from the mgmt partition
                self.stg_ftsk.wrapper_tasks[vios_w.uuid].add_functor_subtask(
                    pvm_vfcm.remove_maps, mgmt_uuid, port_map=npiv_port_map,
                    logspec=ls)

        # This loop adds the maps from the appropriate VIOS to the client VM
        for npiv_port_map in npiv_port_maps:
            vios_w = pvm_vfcm.find_vios_for_port_map(vios_wraps, npiv_port_map)
            ls = [LOG.info, _LI("Adding NPIV mapping for instance %(inst)s "
                                "for Virtual I/O Server %(vios)s."),
                  {'inst': self.instance.name, 'vios': vios_w.name}]

            # Add the subtask to add the specific map.
            self.stg_ftsk.wrapper_tasks[vios_w.uuid].add_functor_subtask(
                pvm_vfcm.add_map, self.host_uuid, self.vm_uuid, npiv_port_map,
                logspec=ls)

        # After all the mappings, make sure the fabric state is updated.
        def set_state():
            self._set_fabric_state(fabric, FS_INST_MAPPED)
        volume_id = self.connection_info['data']['volume_id']
        self.stg_ftsk.add_post_execute(task.FunctorTask(
            set_state, name='fab_%s_%s' % (fabric, volume_id)))
Example #27
0
    def _handle_base_image(self, img_id, base_file):
        """Handle the checks for a single base image."""

        image_in_use = False

        LOG.info(_LI('image %(id)s at (%(base_file)s): checking'),
                 {'id': img_id,
                  'base_file': base_file})

        if base_file in self.unexplained_images:
            self.unexplained_images.remove(base_file)

        if img_id in self.used_images:
            local, remote, instances = self.used_images[img_id]

            if local > 0 or remote > 0:
                image_in_use = True
                LOG.info(_LI('image %(id)s at (%(base_file)s): '
                             'in use: on this node %(local)d local, '
                             '%(remote)d on other nodes sharing this instance '
                             'storage'),
                         {'id': img_id,
                          'base_file': base_file,
                          'local': local,
                          'remote': remote})

                self.active_base_files.append(base_file)

                if not base_file:
                    LOG.warning(_LW('image %(id)s at (%(base_file)s): warning '
                                 '-- an absent base file is in use! '
                                 'instances: %(instance_list)s'),
                                {'id': img_id,
                                 'base_file': base_file,
                                 'instance_list': ' '.join(instances)})

        if base_file:
            if not image_in_use:
                LOG.debug('image %(id)s at (%(base_file)s): image is not in '
                          'use',
                          {'id': img_id,
                           'base_file': base_file})
                self.removable_base_files.append(base_file)

            else:
                LOG.debug('image %(id)s at (%(base_file)s): image is in '
                          'use',
                          {'id': img_id,
                           'base_file': base_file})
                if os.path.exists(base_file):
                    libvirt_utils.update_mtime(base_file)
Example #28
0
    def _create_config_drive(self, instance, injected_files, admin_password,
                             network_info):
        if CONF.config_drive_format != 'iso9660':
            raise vmutils.UnsupportedConfigDriveFormatException(
                _('Invalid config_drive_format "%s"') %
                CONF.config_drive_format)

        LOG.info(_LI('Using config drive for instance'), instance=instance)

        extra_md = {}
        if admin_password and CONF.hyperv.config_drive_inject_password:
            extra_md['admin_pass'] = admin_password

        inst_md = instance_metadata.InstanceMetadata(instance,
                                                     content=injected_files,
                                                     extra_md=extra_md,
                                                     network_info=network_info)

        instance_path = self._pathutils.get_instance_dir(
            instance.name)
        configdrive_path_iso = os.path.join(instance_path, 'configdrive.iso')
        LOG.info(_LI('Creating config drive at %(path)s'),
                 {'path': configdrive_path_iso}, instance=instance)

        with configdrive.ConfigDriveBuilder(instance_md=inst_md) as cdb:
            try:
                cdb.make_drive(configdrive_path_iso)
            except processutils.ProcessExecutionError as e:
                with excutils.save_and_reraise_exception():
                    LOG.error(_LE('Creating config drive failed with '
                                  'error: %s'),
                              e, instance=instance)

        if not CONF.hyperv.config_drive_cdrom:
            configdrive_path = os.path.join(instance_path,
                                            'configdrive.vhd')
            utils.execute(CONF.hyperv.qemu_img_cmd,
                          'convert',
                          '-f',
                          'raw',
                          '-O',
                          'vpc',
                          configdrive_path_iso,
                          configdrive_path,
                          attempts=1)
            self._pathutils.remove(configdrive_path_iso)
        else:
            configdrive_path = configdrive_path_iso

        return configdrive_path
Example #29
0
File: report.py Project: taget/nova
    def _update_inventory(self, compute_node):
        """Update the inventory for this compute node if needed.

        :param compute_node: The objects.ComputeNode for the operation
        :returns: True if the inventory was updated (or did not need to be),
                  False otherwise.
        """
        url = '/resource_providers/%s/inventories' % compute_node.uuid
        data = self._compute_node_inventory(compute_node)
        result = self.put(url, data)
        if result.status_code == 409:
            # Generation fail, re-poll and then re-try
            del self._resource_providers[compute_node.uuid]
            self._ensure_resource_provider(
                compute_node.uuid, compute_node.hypervisor_hostname)
            LOG.info(_LI('Retrying update inventory for %s'),
                     compute_node.uuid)
            # Regenerate the body with the new generation
            data = self._compute_node_inventory(compute_node)
            result = self.put(url, data)
        elif not result:
            LOG.warning(_LW('Failed to update inventory for '
                            '%(uuid)s: %(status)i %(text)s'),
                        {'uuid': compute_node.uuid,
                         'status': result.status_code,
                         'text': result.text})
            return False

        generation = data['resource_provider_generation']
        if result.status_code == 200:
            self._resource_providers[compute_node.uuid].generation = (
                generation + 1)
            LOG.debug('Updated inventory for %s at generation %i' % (
                compute_node.uuid, generation))
            return True
        elif result.status_code == 409:
            LOG.info(_LI('Double generation clash updating inventory '
                         'for %(uuid)s at generation %(gen)i'),
                     {'uuid': compute_node.uuid,
                      'gen': generation})
            return False

        LOG.info(_LI('Received unexpected response code %(code)i while '
                     'trying to update inventory for compute node %(uuid)s '
                     'at generation %(gen)i: %(text)s'),
                 {'uuid': compute_node.uuid,
                  'code': result.status_code,
                  'gen': generation,
                  'text': result.text})
        return False
Example #30
0
 def _match_forced_nodes(host_map, nodes_to_force):
     forced_nodes = []
     for (hostname, nodename) in list(host_map.keys()):
         if nodename not in nodes_to_force:
             del host_map[(hostname, nodename)]
         else:
             forced_nodes.append(nodename)
     if host_map:
         forced_nodes_str = ', '.join(forced_nodes)
         msg = _LI('Host filter forcing available nodes to %s')
     else:
         forced_nodes_str = ', '.join(nodes_to_force)
         msg = _LI("No nodes matched due to not matching "
                   "'force_nodes' value of '%s'")
     LOG.info(msg % forced_nodes_str)
Example #31
0
    def _delete_inventory(self, rp_uuid):
        """Deletes all inventory records for a resource provider with the
        supplied UUID.

        First attempt to DELETE the inventory using microversion 1.5. If
        this results in a 406, fail over to a PUT.
        """
        curr = self._get_inventory_and_update_provider_generation(rp_uuid)

        # Check to see if we need to update placement's view
        if not curr.get('inventories', {}):
            msg = "No inventory to delete from resource provider %s."
            LOG.debug(msg, rp_uuid)
            return

        msg = _LI("Compute node %s reported no inventory but previous "
                  "inventory was detected. Deleting existing inventory "
                  "records.")
        LOG.info(msg, rp_uuid)

        url = '/resource_providers/%s/inventories' % rp_uuid
        r = self.delete(url, version="1.5")
        placement_req_id = get_placement_request_id(r)
        cur_rp_gen = self._resource_providers[rp_uuid]['generation']
        msg_args = {
            'rp_uuid': rp_uuid,
            'placement_req_id': placement_req_id,
        }
        if r.status_code == 406:
            # microversion 1.5 not available so try the earlier way
            # TODO(cdent): When we're happy that all placement
            # servers support microversion 1.5 we can remove this
            # call and the associated code.
            LOG.debug('Falling back to placement API microversion 1.0 '
                      'for deleting all inventory for a resource provider.')
            payload = {
                'resource_provider_generation': cur_rp_gen,
                'inventories': {},
            }
            r = self.put(url, payload)
            placement_req_id = get_placement_request_id(r)
            msg_args['placement_req_id'] = placement_req_id
            if r.status_code == 200:
                # Update our view of the generation for next time
                updated_inv = r.json()
                new_gen = updated_inv['resource_provider_generation']

                self._resource_providers[rp_uuid]['generation'] = new_gen
                msg_args['generation'] = new_gen
                LOG.info(_LI("[%(placement_req_id)s] Deleted all inventory "
                             "for resource provider %(rp_uuid)s at generation "
                             "%(generation)i."),
                         msg_args)
                return

        if r.status_code == 204:
            self._resource_providers[rp_uuid]['generation'] = cur_rp_gen + 1
            LOG.info(_LI("[%(placement_req_id)s] Deleted all inventory for "
                         "resource provider %(rp_uuid)s."),
                     msg_args)
            return
        elif r.status_code == 404:
            # This can occur if another thread deleted the inventory and the
            # resource provider already
            LOG.debug("[%(placement_req_id)s] Resource provider %(rp_uuid)s "
                      "deleted by another thread when trying to delete "
                      "inventory. Ignoring.",
                      msg_args)
            self._resource_providers.pop(rp_uuid, None)
            self._provider_aggregate_map.pop(rp_uuid, None)
            return
        elif r.status_code == 409:
            rc_str = _extract_inventory_in_use(r.text)
            if rc_str is not None:
                msg = _LW("[%(placement_req_id)s] We cannot delete inventory "
                          "%(rc_str)s for resource provider %(rp_uuid)s "
                          "because the inventory is in use.")
                msg_args['rc_str'] = rc_str
                LOG.warning(msg, msg_args)
                return

        msg = _LE("[%(placement_req_id)s] Failed to delete inventory for "
                  "resource provider %(rp_uuid)s. Got error response: "
                  "%(err)s.")
        msg_args['err'] = r.text
        LOG.error(msg, msg_args)
Example #32
0
        def delayed_create():
            """This handles the fetching and decrypting of the part files."""
            context.update_store()
            log_vars = {
                'image_location': image_location,
                'image_path': image_path
            }

            def _update_image_state(context, image_uuid, image_state):
                metadata = {'properties': {'image_state': image_state}}
                self.service.update(context,
                                    image_uuid,
                                    metadata,
                                    purge_props=False)

            def _update_image_data(context, image_uuid, image_data):
                metadata = {}
                self.service.update(context,
                                    image_uuid,
                                    metadata,
                                    image_data,
                                    purge_props=False)

            try:
                _update_image_state(context, image_uuid, 'downloading')

                try:
                    parts = []
                    elements = manifest.find('image').getiterator('filename')
                    for fn_element in elements:
                        part = self._download_file(bucket, fn_element.text,
                                                   image_path)
                        parts.append(part)

                    # NOTE(vish): this may be suboptimal, should we use cat?
                    enc_filename = os.path.join(image_path, 'image.encrypted')
                    with open(enc_filename, 'w') as combined:
                        for filename in parts:
                            with open(filename) as part:
                                shutil.copyfileobj(part, combined)

                except Exception:
                    LOG.exception(
                        _LE("Failed to download %(image_location)s "
                            "to %(image_path)s"), log_vars)
                    _update_image_state(context, image_uuid, 'failed_download')
                    return

                _update_image_state(context, image_uuid, 'decrypting')

                try:
                    hex_key = manifest.find('image/ec2_encrypted_key').text
                    encrypted_key = binascii.a2b_hex(hex_key)
                    hex_iv = manifest.find('image/ec2_encrypted_iv').text
                    encrypted_iv = binascii.a2b_hex(hex_iv)

                    dec_filename = os.path.join(image_path, 'image.tar.gz')
                    self._decrypt_image(context, enc_filename, encrypted_key,
                                        encrypted_iv, dec_filename)
                except Exception:
                    LOG.exception(
                        _LE("Failed to decrypt %(image_location)s "
                            "to %(image_path)s"), log_vars)
                    _update_image_state(context, image_uuid, 'failed_decrypt')
                    return

                _update_image_state(context, image_uuid, 'untarring')

                try:
                    unz_filename = self._untarzip_image(
                        image_path, dec_filename)
                except Exception:
                    LOG.exception(
                        _LE("Failed to untar %(image_location)s "
                            "to %(image_path)s"), log_vars)
                    _update_image_state(context, image_uuid, 'failed_untar')
                    return

                _update_image_state(context, image_uuid, 'uploading')
                try:
                    with open(unz_filename) as image_file:
                        _update_image_data(context, image_uuid, image_file)
                except Exception:
                    LOG.exception(
                        _LE("Failed to upload %(image_location)s "
                            "to %(image_path)s"), log_vars)
                    _update_image_state(context, image_uuid, 'failed_upload')
                    return

                metadata = {
                    'status': 'active',
                    'properties': {
                        'image_state': 'available'
                    }
                }
                self.service.update(context,
                                    image_uuid,
                                    metadata,
                                    purge_props=False)

                shutil.rmtree(image_path)
            except exception.ImageNotFound:
                LOG.info(_LI("Image %s was deleted underneath us"), image_uuid)
                return
Example #33
0
    def schedule_run_instance(self, context, request_spec, admin_password,
                              injected_files, requested_networks,
                              is_first_time, filter_properties,
                              legacy_bdm_in_spec):
        """Provisions instances that needs to be scheduled

        Applies filters and weighters on request properties to get a list of
        compute hosts and calls them to spawn instance(s).
        """
        payload = dict(request_spec=request_spec)
        self.notifier.info(context, 'scheduler.run_instance.start', payload)

        instance_uuids = request_spec.get('instance_uuids')
        LOG.info(
            _LI("Attempting to build %(num_instances)d instance(s) "
                "uuids: %(instance_uuids)s"), {
                    'num_instances': len(instance_uuids),
                    'instance_uuids': instance_uuids
                })
        LOG.debug("Request Spec: %s" % request_spec)

        # check retry policy.  Rather ugly use of instance_uuids[0]...
        # but if we've exceeded max retries... then we really only
        # have a single instance.
        scheduler_utils.populate_retry(filter_properties, instance_uuids[0])
        weighed_hosts = self._schedule(context, request_spec,
                                       filter_properties)

        # NOTE: Pop instance_uuids as individual creates do not need the
        # set of uuids. Do not pop before here as the upper exception
        # handler fo NoValidHost needs the uuid to set error state
        instance_uuids = request_spec.pop('instance_uuids')

        # NOTE(comstud): Make sure we do not pass this through.  It
        # contains an instance of RpcContext that cannot be serialized.
        filter_properties.pop('context', None)

        for num, instance_uuid in enumerate(instance_uuids):
            request_spec['instance_properties']['launch_index'] = num

            try:
                try:
                    weighed_host = weighed_hosts.pop(0)
                    LOG.info(
                        _LI("Choosing host %(weighed_host)s "
                            "for instance %(instance_uuid)s"), {
                                'weighed_host': weighed_host,
                                'instance_uuid': instance_uuid
                            })
                except IndexError:
                    raise exception.NoValidHost(reason="")

                self._provision_resource(context,
                                         weighed_host,
                                         request_spec,
                                         filter_properties,
                                         requested_networks,
                                         injected_files,
                                         admin_password,
                                         is_first_time,
                                         instance_uuid=instance_uuid,
                                         legacy_bdm_in_spec=legacy_bdm_in_spec)
            except Exception as ex:
                # NOTE(vish): we don't reraise the exception here to make sure
                #             that all instances in the request get set to
                #             error properly
                driver.handle_schedule_error(context, ex, instance_uuid,
                                             request_spec)
            # scrub retry host list in case we're scheduling multiple
            # instances:
            retry = filter_properties.get('retry', {})
            retry['hosts'] = []

        self.notifier.info(context, 'scheduler.run_instance.end', payload)
Example #34
0
    def _create_virtual_nic(self, instance, image_info, network_info, vm_ref):
        if network_info is None:
            return
        vif_model = image_info.vif_model
        if not vif_model:
            vif_model = "VirtualE1000"
        vif_infos = []
        for vif in network_info:
            portgroup_name = []
            mac_address = vif['address']
            network_id = vif['network']['id']
            portgroup_name.append(network_id)
            network_id_cluster_id = (network_id + "-" +
                                     self._get_mo_id_from_instance(instance))
            portgroup_name.append(network_id_cluster_id)
            # wait for port group creation (if not present) by neutron agent.
            network_ref = self._wait_and_get_portgroup_details(
                self._session, vm_ref, portgroup_name)
            if not network_ref:
                msg = ("Portgroup %(vlan)s (or) Portgroup %(vxlan)s.", {
                    'vlan': network_id,
                    'vxlan': network_id_cluster_id
                })
                raise exception.NetworkNotCreated(msg)
            vif_infos.append({
                'network_name': network_id_cluster_id,
                'mac_address': mac_address,
                'network_ref': network_ref,
                'iface_id': vif['id'],
                'vif_model': vif_model
            })

        config_spec = self.client_factory.create('ns0:'
                                                 'VirtualMachineConfigSpec')
        vif_spec_list = []
        for vif_info in vif_infos:
            vif_spec = vm_util._create_vif_spec(self.client_factory, vif_info)
            vif_spec_list.append(vif_spec)

        config_spec.deviceChange = vif_spec_list

        # add vm-uuid and iface-id.x values for Neutron.
        extra_config = []
        i = 0
        for vif_info in vif_infos:
            if vif_info['iface_id']:
                opt = self.client_factory.create('ns0:OptionValue')
                opt.key = "nvp.iface-id.%d" % i
                opt.value = vif_info['iface_id']
                extra_config.append(opt)
                i += 1

        config_spec.extraConfig = extra_config

        LOG.info(_LI("Reconfiguring VM instance to attach NIC."))
        reconfig_task = self._session._call_method(self._session.vim,
                                                   "ReconfigVM_Task",
                                                   vm_ref,
                                                   spec=config_spec)

        self._session._wait_for_task(reconfig_task)
        LOG.info(_LI("Reconfigured VM instance to attach NIC."))
Example #35
0
    def _update_usage_from_migration(self, context, instance, migration):
        """Update usage for a single migration.  The record may
        represent an incoming or outbound migration.
        """
        if not self._is_trackable_migration(migration):
            return

        uuid = migration.instance_uuid
        LOG.info(_LI("Updating from migration %s"), uuid)

        incoming = (migration.dest_compute == self.host and
                    migration.dest_node == self.nodename)
        outbound = (migration.source_compute == self.host and
                    migration.source_node == self.nodename)
        same_node = (incoming and outbound)

        record = self.tracked_instances.get(uuid, None)
        itype = None
        numa_topology = None
        sign = 0
        if same_node:
            # same node resize. record usage for whichever instance type the
            # instance is *not* in:
            if (instance['instance_type_id'] ==
                    migration.old_instance_type_id):
                itype = self._get_instance_type(context, instance, 'new_',
                        migration)
                numa_topology = self._get_migration_context_resource(
                    'numa_topology', instance)
                # Allocate pci device(s) for the instance.
                sign = 1
            else:
                # instance record already has new flavor, hold space for a
                # possible revert to the old instance type:
                itype = self._get_instance_type(context, instance, 'old_',
                        migration)
                numa_topology = self._get_migration_context_resource(
                    'numa_topology', instance, prefix='old_')

        elif incoming and not record:
            # instance has not yet migrated here:
            itype = self._get_instance_type(context, instance, 'new_',
                    migration)
            numa_topology = self._get_migration_context_resource(
                'numa_topology', instance)
            # Allocate pci device(s) for the instance.
            sign = 1

        elif outbound and not record:
            # instance migrated, but record usage for a possible revert:
            itype = self._get_instance_type(context, instance, 'old_',
                    migration)
            numa_topology = self._get_migration_context_resource(
                'numa_topology', instance, prefix='old_')

        if itype:
            usage = self._get_usage_dict(
                        itype, numa_topology=numa_topology)
            if self.pci_tracker and sign:
                self.pci_tracker.update_pci_for_instance(
                    context, instance, sign=sign)
            self._update_usage(usage)
            if self.pci_tracker:
                obj = self.pci_tracker.stats.to_device_pools_obj()
                self.compute_node.pci_device_pools = obj
            else:
                obj = objects.PciDevicePoolList()
                self.compute_node.pci_device_pools = obj
            self.tracked_migrations[uuid] = (migration, itype)
Example #36
0
    def _consolidate_vmdk_volume(self,
                                 instance,
                                 vm_ref,
                                 device,
                                 volume_ref,
                                 adapter_type=None,
                                 disk_type=None):
        """Consolidate volume backing VMDK files if needed.

        The volume's VMDK file attached to an instance can be moved by SDRS
        if enabled on the cluster.
        By this the VMDK files can get copied onto another datastore and the
        copy on this new location will be the latest version of the VMDK file.
        So at the time of detach, we need to consolidate the current backing
        VMDK file with the VMDK file in the new location.

        We need to ensure that the VMDK chain (snapshots) remains intact during
        the consolidation. SDRS retains the chain when it copies VMDK files
        over, so for consolidation we relocate the backing with move option
        as moveAllDiskBackingsAndAllowSharing and then delete the older version
        of the VMDK file attaching the new version VMDK file.

        In the case of a volume boot the we need to ensure that the volume
        is on the datastore of the instance.
        """

        original_device = self._get_vmdk_base_volume_device(volume_ref)

        original_device_path = original_device.backing.fileName
        current_device_path = device.backing.fileName

        if original_device_path == current_device_path:
            # The volume is not moved from its original location.
            # No consolidation is required.
            LOG.debug(
                "The volume has not been displaced from "
                "its original location: %s. No consolidation "
                "needed.", current_device_path)
            return

        # The volume has been moved from its original location.
        # Need to consolidate the VMDK files.
        LOG.info(
            _LI("The volume's backing has been relocated to %s. Need to "
                "consolidate backing disk file."), current_device_path)

        # Pick the resource pool on which the instance resides.
        # Move the volume to the datastore where the new VMDK file is present.
        res_pool = self._get_res_pool_of_vm(vm_ref)
        datastore = device.backing.datastore
        self._relocate_vmdk_volume(volume_ref, res_pool, datastore)

        # Delete the original disk from the volume_ref
        self.detach_disk_from_vm(volume_ref,
                                 instance,
                                 original_device,
                                 destroy_disk=True)

        # Attach the current volume to the volume_ref
        self.attach_disk_to_vm(volume_ref,
                               instance,
                               adapter_type,
                               disk_type,
                               vmdk_path=current_device_path)
    def host_maintenance_mode(self, host, mode):
        """Start/Stop host maintenance window. On start, it triggers
        guest VMs evacuation.
        """
        if not mode:
            return 'off_maintenance'
        host_list = [
            host_ref for host_ref in self._session.host.get_all()
            if host_ref != self._session.host_ref
        ]
        migrations_counter = vm_counter = 0
        ctxt = context.get_admin_context()
        for vm_ref, vm_rec in vm_utils.list_vms(self._session):
            for host_ref in host_list:
                try:
                    # Ensure only guest instances are migrated
                    uuid = vm_rec['other_config'].get('nova_uuid')
                    if not uuid:
                        name = vm_rec['name_label']
                        uuid = _uuid_find(ctxt, host, name)
                        if not uuid:
                            LOG.info(
                                _LI('Instance %(name)s running on '
                                    '%(host)s could not be found in '
                                    'the database: assuming it is a '
                                    'worker VM and skip ping migration '
                                    'to a new host'), {
                                        'name': name,
                                        'host': host
                                    })
                            continue
                    instance = objects.Instance.get_by_uuid(ctxt, uuid)
                    vm_counter = vm_counter + 1

                    aggregate = objects.AggregateList.get_by_host(
                        ctxt, host, key=pool_states.POOL_FLAG)
                    if not aggregate:
                        msg = _('Aggregate for host %(host)s count not be'
                                ' found.') % dict(host=host)
                        raise exception.NotFound(msg)

                    dest = _host_find(ctxt, self._session, aggregate[0],
                                      host_ref)
                    instance.host = dest
                    instance.task_state = task_states.MIGRATING
                    instance.save()

                    self._session.VM.pool_migrate(vm_ref, host_ref,
                                                  {"live": "true"})
                    migrations_counter = migrations_counter + 1

                    instance.vm_state = vm_states.ACTIVE
                    instance.save()

                    break
                except self._session.XenAPI.Failure:
                    LOG.exception(
                        _LE('Unable to migrate VM %(vm_ref)s '
                            'from %(host)s'), {
                                'vm_ref': vm_ref,
                                'host': host
                            })
                    instance.host = host
                    instance.vm_state = vm_states.ACTIVE
                    instance.save()

        if vm_counter == migrations_counter:
            return 'on_maintenance'
        else:
            raise exception.NoValidHost(reason='Unable to find suitable '
                                        'host for VMs evacuation')
Example #38
0
    def _consolidate_vmdk_volume(self, instance, vm_ref, device, volume_ref,
                                 adapter_type=None, disk_type=None):
        """Consolidate volume backing VMDK files if needed.

        The volume's VMDK file attached to an instance can be moved by SDRS
        if enabled on the cluster.
        By this the VMDK files can get copied onto another datastore and the
        copy on this new location will be the latest version of the VMDK file.
        So at the time of detach, we need to consolidate the current backing
        VMDK file with the VMDK file in the new location.

        We need to ensure that the VMDK chain (snapshots) remains intact during
        the consolidation. SDRS retains the chain when it copies VMDK files
        over, so for consolidation we relocate the backing with move option
        as moveAllDiskBackingsAndAllowSharing and then delete the older version
        of the VMDK file attaching the new version VMDK file.

        In the case of a volume boot the we need to ensure that the volume
        is on the datastore of the instance.
        """

        original_device = self._get_vmdk_base_volume_device(volume_ref)

        original_device_path = original_device.backing.fileName
        current_device_path = device.backing.fileName

        if original_device_path == current_device_path:
            # The volume is not moved from its original location.
            # No consolidation is required.
            LOG.debug("The volume has not been displaced from "
                      "its original location: %s. No consolidation "
                      "needed.", current_device_path)
            return

        # The volume has been moved from its original location.
        # Need to consolidate the VMDK files.
        LOG.info(_LI("The volume's backing has been relocated to %s. Need to "
                     "consolidate backing disk file."), current_device_path)

        # Pick the host and resource pool on which the instance resides.
        # Move the volume to the datastore where the new VMDK file is present.
        host = self._get_host_of_vm(vm_ref)
        res_pool = self._get_res_pool_of_host(host)
        datastore = device.backing.datastore
        detached = False
        LOG.debug("Relocating volume's backing: %(backing)s to resource "
                  "pool: %(rp)s, datastore: %(ds)s, host: %(host)s.",
                  {'backing': volume_ref, 'rp': res_pool, 'ds': datastore,
                   'host': host})
        try:
            self._relocate_vmdk_volume(volume_ref, res_pool, datastore, host)
        except oslo_vmw_exceptions.FileNotFoundException:
            # Volume's vmdk was moved; remove the device so that we can
            # relocate the volume.
            LOG.warn(_LW("Virtual disk: %s of volume's backing not found."),
                     original_device_path, exc_info=True)
            LOG.debug("Removing disk device of volume's backing and "
                      "reattempting relocate.")
            self.detach_disk_from_vm(volume_ref, instance, original_device)
            detached = True
            self._relocate_vmdk_volume(volume_ref, res_pool, datastore, host)

        # Volume's backing is relocated now; detach the old vmdk if not done
        # already.
        if not detached:
            self.detach_disk_from_vm(volume_ref, instance, original_device,
                                     destroy_disk=True)

        # Attach the current volume to the volume_ref
        self.attach_disk_to_vm(volume_ref, instance,
                               adapter_type, disk_type,
                               vmdk_path=current_device_path)
Example #39
0
    def __init__(self, init_only=None, v3mode=False):
        # TODO(cyeoh): bp v3-api-extension-framework. Currently load
        # all extensions but eventually should be able to exclude
        # based on a config file
        # TODO(oomichi): We can remove v3mode argument after moving all v3 APIs
        # to v2.1.
        def _check_load_extension(ext):
            if (self.init_only is None or ext.obj.alias in
                self.init_only) and isinstance(ext.obj,
                                               extensions.V3APIExtensionBase):

                # Check whitelist is either empty or if not then the extension
                # is in the whitelist
                if (not CONF.osapi_v3.extensions_whitelist or
                        ext.obj.alias in CONF.osapi_v3.extensions_whitelist):

                    # Check the extension is not in the blacklist
                    if ext.obj.alias not in CONF.osapi_v3.extensions_blacklist:
                        return self._register_extension(ext)
                    else:
                        LOG.warning(_LW("Not loading %s because it is "
                                        "in the blacklist"), ext.obj.alias)
                        return False
                else:
                    LOG.warning(
                        _LW("Not loading %s because it is not in the "
                            "whitelist"), ext.obj.alias)
                    return False
            else:
                return False

        if not CONF.osapi_v3.enabled:
            LOG.info(_LI("V3 API has been disabled by configuration"))
            return

        self.init_only = init_only
        LOG.debug("v3 API Extension Blacklist: %s",
                  CONF.osapi_v3.extensions_blacklist)
        LOG.debug("v3 API Extension Whitelist: %s",
                  CONF.osapi_v3.extensions_whitelist)

        in_blacklist_and_whitelist = set(
            CONF.osapi_v3.extensions_whitelist).intersection(
                CONF.osapi_v3.extensions_blacklist)
        if len(in_blacklist_and_whitelist) != 0:
            LOG.warning(_LW("Extensions in both blacklist and whitelist: %s"),
                        list(in_blacklist_and_whitelist))

        self.api_extension_manager = stevedore.enabled.EnabledExtensionManager(
            namespace=self.api_extension_namespace(),
            check_func=_check_load_extension,
            invoke_on_load=True,
            invoke_kwds={"extension_info": self.loaded_extension_info})

        if v3mode:
            mapper = PlainMapper()
        else:
            mapper = ProjectMapper()

        self.resources = {}

        # NOTE(cyeoh) Core API support is rewritten as extensions
        # but conceptually still have core
        if list(self.api_extension_manager):
            # NOTE(cyeoh): Stevedore raises an exception if there are
            # no plugins detected. I wonder if this is a bug.
            self._register_resources_check_inherits(mapper)
            self.api_extension_manager.map(self._register_controllers)

        missing_core_extensions = self.get_missing_core_extensions(
            self.loaded_extension_info.get_extensions().keys())
        if not self.init_only and missing_core_extensions:
            LOG.critical(_LC("Missing core API extensions: %s"),
                         missing_core_extensions)
            raise exception.CoreAPIMissing(
                missing_apis=missing_core_extensions)

        super(APIRouterV21, self).__init__(mapper)
Example #40
0
    def start(self):
        """Start the service.

        This includes starting an RPC service, initializing
        periodic tasks, etc.
        """
        # NOTE(melwitt): Clear the cell cache holding database transaction
        # context manager objects. We do this to ensure we create new internal
        # oslo.db locks to avoid a situation where a child process receives an
        # already locked oslo.db lock when it is forked. When a child process
        # inherits a locked oslo.db lock, database accesses through that
        # transaction context manager will never be able to acquire the lock
        # and requests will fail with CellTimeout errors.
        # See https://bugs.python.org/issue6721 for more information.
        # With python 3.7, it would be possible for oslo.db to make use of the
        # os.register_at_fork() method to reinitialize its lock. Until we
        # require python 3.7 as a mininum version, we must handle the situation
        # outside of oslo.db.
        context.CELL_CACHE = {}

        assert_eventlet_uses_monotonic_clock()

        verstr = version.version_string_with_package()
        LOG.info(_LI('Starting %(topic)s node (version %(version)s)'), {
            'topic': self.topic,
            'version': verstr
        })
        self.basic_config_check()
        self.manager.init_host()
        self.model_disconnected = False
        ctxt = context.get_admin_context()
        self.service_ref = objects.Service.get_by_host_and_binary(
            ctxt, self.host, self.binary)
        if self.service_ref:
            _update_service_ref(self.service_ref)

        else:
            try:
                self.service_ref = _create_service_ref(self, ctxt)
            except (exception.ServiceTopicExists,
                    exception.ServiceBinaryExists):
                # NOTE(danms): If we race to create a record with a sibling
                # worker, don't fail here.
                self.service_ref = objects.Service.get_by_host_and_binary(
                    ctxt, self.host, self.binary)

        self.manager.pre_start_hook()

        if self.backdoor_port is not None:
            self.manager.backdoor_port = self.backdoor_port

        LOG.debug("Creating RPC server for service %s", self.topic)

        target = messaging.Target(topic=self.topic, server=self.host)

        endpoints = [
            self.manager,
            baserpc.BaseRPCAPI(self.manager.service_name, self.backdoor_port)
        ]
        endpoints.extend(self.manager.additional_endpoints)

        serializer = objects_base.NovaObjectSerializer()

        self.rpcserver = rpc.get_server(target, endpoints, serializer)
        self.rpcserver.start()

        self.manager.post_start_hook()

        LOG.debug("Join ServiceGroup membership for this service %s",
                  self.topic)
        # Add service to the ServiceGroup membership group.
        self.servicegroup_api.join(self.host, self.topic, self)

        if self.periodic_enable:
            if self.periodic_fuzzy_delay:
                initial_delay = random.randint(0, self.periodic_fuzzy_delay)
            else:
                initial_delay = None

            self.tg.add_dynamic_timer(
                self.periodic_tasks,
                initial_delay=initial_delay,
                periodic_interval_max=self.periodic_interval_max)
Example #41
0
    def __init__(self,
                 name,
                 app,
                 host='0.0.0.0',
                 port=0,
                 pool_size=None,
                 protocol=eventlet.wsgi.HttpProtocol,
                 backlog=128,
                 use_ssl=False,
                 max_url_len=None):
        """Initialize, but do not start, a WSGI server.

        :param name: Pretty name for logging.
        :param app: The WSGI application to serve.
        :param host: IP address to serve the application.
        :param port: Port number to server the application.
        :param pool_size: Maximum number of eventlets to spawn concurrently.
        :param backlog: Maximum number of queued connections.
        :param max_url_len: Maximum length of permitted URLs.
        :returns: None
        :raises: nova.exception.InvalidInput
        """
        # Allow operators to customize http requests max header line size.
        eventlet.wsgi.MAX_HEADER_LINE = CONF.wsgi.max_header_line
        self.name = name
        self.app = app
        self._server = None
        self._protocol = protocol
        self.pool_size = pool_size or self.default_pool_size
        self._pool = eventlet.GreenPool(self.pool_size)
        self._logger = logging.getLogger("nova.%s.wsgi.server" % self.name)
        self._use_ssl = use_ssl
        self._max_url_len = max_url_len
        self.client_socket_timeout = CONF.wsgi.client_socket_timeout or None

        if backlog < 1:
            raise exception.InvalidInput(
                reason=_('The backlog must be more than 0'))

        bind_addr = (host, port)
        # TODO(dims): eventlet's green dns/socket module does not actually
        # support IPv6 in getaddrinfo(). We need to get around this in the
        # future or monitor upstream for a fix
        try:
            info = socket.getaddrinfo(bind_addr[0], bind_addr[1],
                                      socket.AF_UNSPEC, socket.SOCK_STREAM)[0]
            family = info[0]
            bind_addr = info[-1]
        except Exception:
            family = socket.AF_INET

        try:
            self._socket = eventlet.listen(bind_addr, family, backlog=backlog)
        except EnvironmentError:
            LOG.error(_LE("Could not bind to %(host)s:%(port)s"), {
                'host': host,
                'port': port
            })
            raise

        (self.host, self.port) = self._socket.getsockname()[0:2]
        LOG.info(_LI("%(name)s listening on %(host)s:%(port)s"), {
            'name': self.name,
            'host': self.host,
            'port': self.port
        })
Example #42
0
    def spawn(self,
              context,
              instance,
              image_meta,
              injected_files,
              admin_password,
              network_info=None,
              block_device_info=None,
              flavor=None):
        """Deploy an instance.

        :param context: The security context.
        :param instance: The instance object.
        :param image_meta: Image dict returned by nova.image.glance
            that defines the image from which to boot this instance.
        :param injected_files: User files to inject into instance. Ignored
            by this driver.
        :param admin_password: Administrator password to set in
            instance. Ignored by this driver.
        :param network_info: Instance network information.
        :param block_device_info: Instance block device
            information. Ignored by this driver.
        :param flavor: The flavor for the instance to be spawned.
        """
        # The compute manager is meant to know the node uuid, so missing uuid
        # is a significant issue. It may mean we've been passed the wrong data.
        node_uuid = instance.get('node')
        if not node_uuid:
            raise ironic.exc.BadRequest(
                _("Ironic node uuid not supplied to "
                  "driver for instance %s.") % instance.uuid)

        node = self.ironicclient.call("node.get", node_uuid)
        flavor = objects.Flavor.get_by_id(context, instance.instance_type_id)

        self._add_driver_fields(node, instance, image_meta, flavor)

        # NOTE(Shrews): The default ephemeral device needs to be set for
        # services (like cloud-init) that depend on it being returned by the
        # metadata server. Addresses bug https://launchpad.net/bugs/1324286.
        if flavor['ephemeral_gb']:
            instance.default_ephemeral_device = '/dev/sda1'
            instance.save()

        # validate we are ready to do the deploy
        validate_chk = self.ironicclient.call("node.validate", node_uuid)
        if not validate_chk.deploy or not validate_chk.power:
            # something is wrong. undo what we have done
            self._cleanup_deploy(context,
                                 node,
                                 instance,
                                 network_info,
                                 flavor=flavor)
            raise exception.ValidationError(
                _("Ironic node: %(id)s failed to validate."
                  " (deploy: %(deploy)s, power: %(power)s)") % {
                      'id': node.uuid,
                      'deploy': validate_chk.deploy,
                      'power': validate_chk.power
                  })

        # prepare for the deploy
        try:
            self._plug_vifs(node, instance, network_info)
            self._start_firewall(instance, network_info)
        except Exception:
            with excutils.save_and_reraise_exception():
                LOG.error(
                    _LE("Error preparing deploy for instance "
                        "%(instance)s on baremetal node %(node)s."), {
                            'instance': instance.uuid,
                            'node': node_uuid
                        })
                self._cleanup_deploy(context,
                                     node,
                                     instance,
                                     network_info,
                                     flavor=flavor)

        # Config drive
        configdrive_value = None
        if configdrive.required_by(instance):
            extra_md = {}
            if admin_password:
                extra_md['admin_pass'] = admin_password

            configdrive_value = self._generate_configdrive(instance,
                                                           node,
                                                           network_info,
                                                           extra_md=extra_md)

            LOG.info(
                _LI("Config drive for instance %(instance)s on "
                    "baremetal node %(node)s created."), {
                        'instance': instance['uuid'],
                        'node': node_uuid
                    })

        # trigger the node deploy
        try:
            self.ironicclient.call("node.set_provision_state",
                                   node_uuid,
                                   ironic_states.ACTIVE,
                                   configdrive=configdrive_value)
        except Exception as e:
            with excutils.save_and_reraise_exception():
                msg = (_LE("Failed to request Ironic to provision instance "
                           "%(inst)s: %(reason)s"), {
                               'inst': instance.uuid,
                               'reason': six.text_type(e)
                           })
                LOG.error(msg)
                self._cleanup_deploy(context,
                                     node,
                                     instance,
                                     network_info,
                                     flavor=flavor)

        timer = loopingcall.FixedIntervalLoopingCall(self._wait_for_active,
                                                     self.ironicclient,
                                                     instance)
        try:
            timer.start(interval=CONF.ironic.api_retry_interval).wait()
        except Exception:
            with excutils.save_and_reraise_exception():
                LOG.error(
                    _LE("Error deploying instance %(instance)s on "
                        "baremetal node %(node)s."), {
                            'instance': instance.uuid,
                            'node': node_uuid
                        })
                self.destroy(context, instance, network_info)
    def _update_available_resource(self, context, resources):

        # initialise the compute node object, creating it
        # if it does not already exist.
        self._init_compute_node(context, resources)

        # if we could not init the compute node the tracker will be
        # disabled and we should quit now
        if self.disabled:
            return

        if 'pci_passthrough_devices' in resources:
            # TODO(jaypipes): Move this into _init_compute_node()
            if not self.pci_tracker:
                n_id = self.compute_node['id'] if self.compute_node else None
                self.pci_tracker = pci_manager.PciDevTracker(context,
                                                             node_id=n_id)
            dev_json = resources.pop('pci_passthrough_devices')
            self.pci_tracker.update_devices_from_hypervisor_resources(dev_json)

        # Grab all instances assigned to this node:
        instances = objects.InstanceList.get_by_host_and_node(
            context,
            self.host,
            self.nodename,
            expected_attrs=[
                'system_metadata', 'numa_topology', 'flavor',
                'migration_context'
            ])

        # Now calculate usage based on instance utilization:
        self._update_usage_from_instances(context, instances)

        # Grab all in-progress migrations:
        migrations = objects.MigrationList.get_in_progress_by_host_and_node(
            context, self.host, self.nodename)

        self._pair_instances_to_migrations(migrations, instances)
        self._update_usage_from_migrations(context, migrations)

        # Detect and account for orphaned instances that may exist on the
        # hypervisor, but are not in the DB:
        orphans = self._find_orphaned_instances()
        self._update_usage_from_orphans(orphans)

        # NOTE(yjiang5): Because pci device tracker status is not cleared in
        # this periodic task, and also because the resource tracker is not
        # notified when instances are deleted, we need remove all usages
        # from deleted instances.
        if self.pci_tracker:
            self.pci_tracker.clean_usage(instances, migrations, orphans)
            dev_pools_obj = self.pci_tracker.stats.to_device_pools_obj()
            self.compute_node.pci_device_pools = dev_pools_obj
        else:
            self.compute_node.pci_device_pools = objects.PciDevicePoolList()

        self._report_final_resource_view()

        metrics = self._get_host_metrics(context, self.nodename)
        # TODO(pmurray): metrics should not be a json string in ComputeNode,
        # but it is. This should be changed in ComputeNode
        self.compute_node.metrics = jsonutils.dumps(metrics)

        # update the compute_node
        self._update(context)
        LOG.info(_LI('Compute_service record updated for %(host)s:%(node)s'), {
            'host': self.host,
            'node': self.nodename
        })
Example #44
0
 def set_disk_qos_specs(self, connection_info, disk_qos_specs):
     LOG.info(
         _LI("The %(protocol)s Hyper-V volume driver "
             "does not support QoS. Ignoring QoS specs."),
         dict(protocol=self._protocol))
Example #45
0
    def download(self, context, image_id, data=None, dst_path=None):
        """Calls out to Glance for data and writes data."""
        if CONF.glance.allowed_direct_url_schemes and dst_path is not None:
            image = self.show(context, image_id, include_locations=True)
            for entry in image.get('locations', []):
                loc_url = entry['url']
                loc_meta = entry['metadata']
                o = urlparse.urlparse(loc_url)
                xfer_mod = self._get_transfer_module(o.scheme)
                if xfer_mod:
                    try:
                        xfer_mod.download(context, o, dst_path, loc_meta)
                        LOG.info(_LI("Successfully transferred "
                                     "using %s"), o.scheme)
                        return
                    except Exception:
                        LOG.exception(_LE("Download image error"))

        try:
            image_chunks = self._client.call(context, 2, 'data', image_id)
        except Exception:
            _reraise_translated_image_exception(image_id)

        # Retrieve properties for verification of Glance image signature
        verifier = None
        if CONF.glance.verify_glance_signatures:
            image_meta_dict = self.show(context,
                                        image_id,
                                        include_locations=False)
            image_meta = objects.ImageMeta.from_dict(image_meta_dict)
            img_signature = image_meta.properties.get('img_signature')
            img_sig_hash_method = image_meta.properties.get(
                'img_signature_hash_method')
            img_sig_cert_uuid = image_meta.properties.get(
                'img_signature_certificate_uuid')
            img_sig_key_type = image_meta.properties.get(
                'img_signature_key_type')
            try:
                verifier = signature_utils.get_verifier(
                    context=context,
                    img_signature_certificate_uuid=img_sig_cert_uuid,
                    img_signature_hash_method=img_sig_hash_method,
                    img_signature=img_signature,
                    img_signature_key_type=img_sig_key_type,
                )
            except cursive_exception.SignatureVerificationError:
                with excutils.save_and_reraise_exception():
                    LOG.error(
                        _LE('Image signature verification failed '
                            'for image: %s'), image_id)

        close_file = False
        if data is None and dst_path:
            data = open(dst_path, 'wb')
            close_file = True

        if data is None:

            # Perform image signature verification
            if verifier:
                try:
                    for chunk in image_chunks:
                        verifier.update(chunk)
                    verifier.verify()

                    LOG.info(
                        _LI('Image signature verification succeeded '
                            'for image: %s'), image_id)

                except cryptography.exceptions.InvalidSignature:
                    with excutils.save_and_reraise_exception():
                        LOG.error(
                            _LE('Image signature verification failed '
                                'for image: %s'), image_id)
            return image_chunks
        else:
            try:
                for chunk in image_chunks:
                    if verifier:
                        verifier.update(chunk)
                    data.write(chunk)
                if verifier:
                    verifier.verify()
                    LOG.info(
                        _LI('Image signature verification succeeded '
                            'for image %s'), image_id)
            except cryptography.exceptions.InvalidSignature:
                data.truncate(0)
                with excutils.save_and_reraise_exception():
                    LOG.error(
                        _LE('Image signature verification failed '
                            'for image: %s'), image_id)
            except Exception as ex:
                with excutils.save_and_reraise_exception():
                    LOG.error(_LE("Error writing to %(path)s: %(exception)s"),
                              {
                                  'path': dst_path,
                                  'exception': ex
                              })
            finally:
                if close_file:
                    # Ensure that the data is pushed all the way down to
                    # persistent storage. This ensures that in the event of a
                    # subsequent host crash we don't have running instances
                    # using a corrupt backing file.
                    data.flush()
                    os.fsync(data.fileno())
                    data.close()
Example #46
0
    def _delete_inventory(self, rp_uuid):
        """Deletes all inventory records for a resource provider with the
        supplied UUID.
        """
        curr = self._get_inventory_and_update_provider_generation(rp_uuid)

        # Check to see if we need to update placement's view
        if not curr.get('inventories', {}):
            msg = "No inventory to delete from resource provider %s."
            LOG.debug(msg, rp_uuid)
            return

        msg = _LI("Compute node %s reported no inventory but previous "
                  "inventory was detected. Deleting existing inventory "
                  "records.")
        LOG.info(msg, rp_uuid)

        url = '/resource_providers/%s/inventories' % rp_uuid
        cur_rp_gen = self._resource_providers[rp_uuid]['generation']
        payload = {
            'resource_provider_generation': cur_rp_gen,
            'inventories': {},
        }
        r = self.put(url, payload)
        placement_req_id = get_placement_request_id(r)
        if r.status_code == 200:
            # Update our view of the generation for next time
            updated_inv = r.json()
            new_gen = updated_inv['resource_provider_generation']

            self._resource_providers[rp_uuid]['generation'] = new_gen
            msg_args = {
                'rp_uuid': rp_uuid,
                'generation': new_gen,
                'placement_req_id': placement_req_id,
            }
            LOG.info(
                _LI('[%(placement_req_id)s] Deleted all inventory for '
                    'resource provider %(rp_uuid)s at generation '
                    '%(generation)i'), msg_args)
            return
        elif r.status_code == 409:
            rc_str = _extract_inventory_in_use(r.text)
            if rc_str is not None:
                msg = _LW("[%(placement_req_id)s] We cannot delete inventory "
                          "%(rc_str)s for resource provider %(rp_uuid)s "
                          "because the inventory is in use.")
                msg_args = {
                    'rp_uuid': rp_uuid,
                    'rc_str': rc_str,
                    'placement_req_id': placement_req_id,
                }
                LOG.warning(msg, msg_args)
                return

        msg = _LE("[%(placement_req_id)s] Failed to delete inventory for "
                  "resource provider %(rp_uuid)s. Got error response: %(err)s")
        msg_args = {
            'rp_uuid': rp_uuid,
            'err': r.text,
            'placement_req_id': placement_req_id,
        }
        LOG.error(msg, msg_args)
    def _handle_base_image(self, img_id, base_file):
        """Handle the checks for a single base image."""

        image_bad = False
        image_in_use = False

        LOG.info(_LI('image %(id)s at (%(base_file)s): checking'),
                 {'id': img_id,
                  'base_file': base_file})

        if base_file in self.unexplained_images:
            self.unexplained_images.remove(base_file)

        if (base_file and os.path.exists(base_file)
                and os.path.isfile(base_file)):
            # _verify_checksum returns True if the checksum is ok, and None if
            # there is no checksum file
            checksum_result = self._verify_checksum(img_id, base_file)
            if checksum_result is not None:
                image_bad = not checksum_result

            # Give other threads a chance to run
            time.sleep(0)

        instances = []
        if img_id in self.used_images:
            local, remote, instances = self.used_images[img_id]

            if local > 0 or remote > 0:
                image_in_use = True
                LOG.info(_LI('image %(id)s at (%(base_file)s): '
                             'in use: on this node %(local)d local, '
                             '%(remote)d on other nodes sharing this instance '
                             'storage'),
                         {'id': img_id,
                          'base_file': base_file,
                          'local': local,
                          'remote': remote})

                self.active_base_files.append(base_file)

                if not base_file:
                    LOG.warn(_LW('image %(id)s at (%(base_file)s): warning '
                                 '-- an absent base file is in use! '
                                 'instances: %(instance_list)s'),
                                {'id': img_id,
                                 'base_file': base_file,
                                 'instance_list': ' '.join(instances)})

        if image_bad:
            self.corrupt_base_files.append(base_file)

        if base_file:
            if not image_in_use:
                LOG.debug('image %(id)s at (%(base_file)s): image is not in '
                          'use',
                          {'id': img_id,
                           'base_file': base_file})
                self.removable_base_files.append(base_file)

            else:
                LOG.debug('image %(id)s at (%(base_file)s): image is in '
                          'use',
                          {'id': img_id,
                           'base_file': base_file})
                if os.path.exists(base_file):
                    libvirt_utils.chown(base_file, os.getuid())
                    os.utime(base_file, None)
Example #48
0
    def execute(self, *cmd, **kwargs):
        # NOTE(dims): This method is to provide compatibility with the
        # processutils.execute interface. So that calling daemon or direct
        # rootwrap to honor the same set of flags in kwargs and to ensure
        # that we don't regress any current behavior.
        cmd = [str(c) for c in cmd]
        loglevel = kwargs.pop('loglevel', logging.DEBUG)
        log_errors = kwargs.pop('log_errors', None)
        process_input = kwargs.pop('process_input', None)
        delay_on_retry = kwargs.pop('delay_on_retry', True)
        attempts = kwargs.pop('attempts', 1)
        check_exit_code = kwargs.pop('check_exit_code', [0])
        ignore_exit_code = False
        if isinstance(check_exit_code, bool):
            ignore_exit_code = not check_exit_code
            check_exit_code = [0]
        elif isinstance(check_exit_code, int):
            check_exit_code = [check_exit_code]

        sanitized_cmd = strutils.mask_password(' '.join(cmd))
        LOG.info(
            _LI('Executing RootwrapDaemonHelper.execute '
                'cmd=[%(cmd)r] kwargs=[%(kwargs)r]'), {
                    'cmd': sanitized_cmd,
                    'kwargs': kwargs
                })

        while attempts > 0:
            attempts -= 1
            try:
                start_time = time.time()
                LOG.log(loglevel, _('Running cmd (subprocess): %s'),
                        sanitized_cmd)

                (returncode, out,
                 err) = self.client.execute(cmd, process_input)

                end_time = time.time() - start_time
                LOG.log(
                    loglevel,
                    'CMD "%(sanitized_cmd)s" returned: %(return_code)s '
                    'in %(end_time)0.3fs', {
                        'sanitized_cmd': sanitized_cmd,
                        'return_code': returncode,
                        'end_time': end_time
                    })

                if not ignore_exit_code and returncode not in check_exit_code:
                    out = strutils.mask_password(out)
                    err = strutils.mask_password(err)
                    raise processutils.ProcessExecutionError(
                        exit_code=returncode,
                        stdout=out,
                        stderr=err,
                        cmd=sanitized_cmd)
                return (out, err)

            except processutils.ProcessExecutionError as err:
                # if we want to always log the errors or if this is
                # the final attempt that failed and we want to log that.
                if log_errors == processutils.LOG_ALL_ERRORS or (
                        log_errors == processutils.LOG_FINAL_ERROR
                        and not attempts):
                    format = _('%(desc)r\ncommand: %(cmd)r\n'
                               'exit code: %(code)r\nstdout: %(stdout)r\n'
                               'stderr: %(stderr)r')
                    LOG.log(
                        loglevel, format, {
                            "desc": err.description,
                            "cmd": err.cmd,
                            "code": err.exit_code,
                            "stdout": err.stdout,
                            "stderr": err.stderr
                        })
                if not attempts:
                    LOG.log(loglevel, _('%r failed. Not Retrying.'),
                            sanitized_cmd)
                    raise
                else:
                    LOG.log(loglevel, _('%r failed. Retrying.'), sanitized_cmd)
                    if delay_on_retry:
                        time.sleep(random.randint(20, 200) / 100.0)
Example #49
0
    def _update_available_resource(self, context, resources):

        # initialise the compute node object, creating it
        # if it does not already exist.
        self._init_compute_node(context, resources)

        # if we could not init the compute node the tracker will be
        # disabled and we should quit now
        if self.disabled:
            return

        if 'pci_passthrough_devices' in resources:
            devs = []
            for dev in jsonutils.loads(
                    resources.pop('pci_passthrough_devices')):
                if dev['dev_type'] == 'type-PF':
                    continue

                if self.pci_filter.device_assignable(dev):
                    devs.append(dev)

            if not self.pci_tracker:
                n_id = self.compute_node['id'] if self.compute_node else None
                self.pci_tracker = pci_manager.PciDevTracker(context,
                                                             node_id=n_id)
            self.pci_tracker.set_hvdevs(devs)

        # Grab all instances assigned to this node:
        instances = objects.InstanceList.get_by_host_and_node(
            context,
            self.host,
            self.nodename,
            expected_attrs=['system_metadata', 'numa_topology'])

        # Now calculate usage based on instance utilization:
        self._update_usage_from_instances(context, instances)

        # Grab all in-progress migrations:
        migrations = objects.MigrationList.get_in_progress_by_host_and_node(
            context, self.host, self.nodename)

        self._update_usage_from_migrations(context, migrations)

        # Detect and account for orphaned instances that may exist on the
        # hypervisor, but are not in the DB:
        orphans = self._find_orphaned_instances()
        self._update_usage_from_orphans(orphans)

        # NOTE(yjiang5): Because pci device tracker status is not cleared in
        # this periodic task, and also because the resource tracker is not
        # notified when instances are deleted, we need remove all usages
        # from deleted instances.
        if self.pci_tracker:
            self.pci_tracker.clean_usage(instances, migrations, orphans)
            self.compute_node['pci_device_pools'] = self.pci_tracker.stats
        else:
            self.compute_node['pci_device_pools'] = []

        self._report_final_resource_view()

        metrics = self._get_host_metrics(context, self.nodename)
        self.compute_node['metrics'] = jsonutils.dumps(metrics)

        # TODO(sbauza): Juno compute nodes are missing the host field and
        # the Juno ResourceTracker does not set this field, even if
        # the ComputeNode object can show it.
        # Unfortunately, as we're not yet using ComputeNode.save(), we need
        # to add this field in the resources dict until the RT is using
        # the ComputeNode.save() method for populating the table.
        # tl;dr: To be removed once RT is using ComputeNode.save()
        self.compute_node['host'] = self.host

        self._update(context)
        LOG.info(_LI('Compute_service record updated for %(host)s:%(node)s'), {
            'host': self.host,
            'node': self.nodename
        })
Example #50
0
    def _update_inventory_attempt(self, rp_uuid, inv_data):
        """Update the inventory for this resource provider if needed.

        :param rp_uuid: The resource provider UUID for the operation
        :param inv_data: The new inventory for the resource provider
        :returns: True if the inventory was updated (or did not need to be),
                  False otherwise.
        """
        curr = self._get_inventory_and_update_provider_generation(rp_uuid)

        # Check to see if we need to update placement's view
        if inv_data == curr.get('inventories', {}):
            return True

        cur_rp_gen = self._resource_providers[rp_uuid]['generation']
        payload = {
            'resource_provider_generation': cur_rp_gen,
            'inventories': inv_data,
        }
        url = '/resource_providers/%s/inventories' % rp_uuid
        result = self.put(url, payload)
        if result.status_code == 409:
            LOG.info(
                _LI('[%(placement_req_id)s] Inventory update conflict '
                    'for %(resource_provider_uuid)s with generation ID '
                    '%(generation_id)s'), {
                        'placement_req_id': get_placement_request_id(result),
                        'resource_provider_uuid': rp_uuid,
                        'generation_id': cur_rp_gen
                    })
            # NOTE(jaypipes): There may be cases when we try to set a
            # provider's inventory that results in attempting to delete an
            # inventory record for a resource class that has an active
            # allocation. We need to catch this particular case and raise an
            # exception here instead of returning False, since we should not
            # re-try the operation in this case.
            #
            # A use case for where this can occur is the following:
            #
            # 1) Provider created for each Ironic baremetal node in Newton
            # 2) Inventory records for baremetal node created for VCPU,
            #    MEMORY_MB and DISK_GB
            # 3) A Nova instance consumes the baremetal node and allocation
            #    records are created for VCPU, MEMORY_MB and DISK_GB matching
            #    the total amount of those resource on the baremetal node.
            # 3) Upgrade to Ocata and now resource tracker wants to set the
            #    provider's inventory to a single record of resource class
            #    CUSTOM_IRON_SILVER (or whatever the Ironic node's
            #    "resource_class" attribute is)
            # 4) Scheduler report client sends the inventory list containing a
            #    single CUSTOM_IRON_SILVER record and placement service
            #    attempts to delete the inventory records for VCPU, MEMORY_MB
            #    and DISK_GB. An exception is raised from the placement service
            #    because allocation records exist for those resource classes,
            #    and a 409 Conflict is returned to the compute node. We need to
            #    trigger a delete of the old allocation records and then set
            #    the new inventory, and then set the allocation record to the
            #    new CUSTOM_IRON_SILVER record.
            match = _RE_INV_IN_USE.search(result.text)
            if match:
                rc = match.group(1)
                raise exception.InventoryInUse(
                    resource_classes=rc,
                    resource_provider=rp_uuid,
                )

            # Invalidate our cache and re-fetch the resource provider
            # to be sure to get the latest generation.
            del self._resource_providers[rp_uuid]
            # NOTE(jaypipes): We don't need to pass a name parameter to
            # _ensure_resource_provider() because we know the resource provider
            # record already exists. We're just reloading the record here.
            self._ensure_resource_provider(rp_uuid)
            return False
        elif not result:
            placement_req_id = get_placement_request_id(result)
            LOG.warning(
                _LW('[%(placement_req_id)s] Failed to update '
                    'inventory for resource provider '
                    '%(uuid)s: %(status)i %(text)s'), {
                        'placement_req_id': placement_req_id,
                        'uuid': rp_uuid,
                        'status': result.status_code,
                        'text': result.text
                    })
            # log the body at debug level
            LOG.debug(
                '[%(placement_req_id)s] Failed inventory update request '
                'for resource provider %(uuid)s with body: %(payload)s', {
                    'placement_req_id': placement_req_id,
                    'uuid': rp_uuid,
                    'payload': payload
                })
            return False

        if result.status_code != 200:
            placement_req_id = get_placement_request_id(result)
            LOG.info(
                _LI('[%(placement_req_id)s] Received unexpected response code '
                    '%(code)i while trying to update inventory for resource '
                    'provider %(uuid)s: %(text)s'), {
                        'placement_req_id': placement_req_id,
                        'uuid': rp_uuid,
                        'code': result.status_code,
                        'text': result.text
                    })
            return False

        # Update our view of the generation for next time
        updated_inventories_result = result.json()
        new_gen = updated_inventories_result['resource_provider_generation']

        self._resource_providers[rp_uuid]['generation'] = new_gen
        LOG.debug('Updated inventory for %s at generation %i', rp_uuid,
                  new_gen)
        return True
Example #51
0
    def __init__(self, init_only=None):
        # TODO(cyeoh): bp v3-api-extension-framework. Currently load
        # all extensions but eventually should be able to exclude
        # based on a config file
        def _check_load_extension(ext):
            if (self.init_only is None or ext.obj.alias in
                self.init_only) and isinstance(ext.obj,
                                               extensions.V21APIExtensionBase):

                # Check whitelist is either empty or if not then the extension
                # is in the whitelist
                if (not CONF.osapi_v21.extensions_whitelist or
                        ext.obj.alias in CONF.osapi_v21.extensions_whitelist):

                    # Check the extension is not in the blacklist
                    blacklist = CONF.osapi_v21.extensions_blacklist
                    if ext.obj.alias not in blacklist:
                        return self._register_extension(ext)
            return False

        if (CONF.osapi_v21.extensions_blacklist or
                CONF.osapi_v21.extensions_whitelist):
            LOG.warning(
                _LW('In the M release you must run all of the API. '
                'The concept of API extensions will be removed from '
                'the codebase to ensure there is a single Compute API.'))

        self.init_only = init_only
        LOG.debug("v21 API Extension Blacklist: %s",
                  CONF.osapi_v21.extensions_blacklist)
        LOG.debug("v21 API Extension Whitelist: %s",
                  CONF.osapi_v21.extensions_whitelist)

        in_blacklist_and_whitelist = set(
            CONF.osapi_v21.extensions_whitelist).intersection(
                CONF.osapi_v21.extensions_blacklist)
        if len(in_blacklist_and_whitelist) != 0:
            LOG.warning(_LW("Extensions in both blacklist and whitelist: %s"),
                        list(in_blacklist_and_whitelist))

        self.api_extension_manager = stevedore.enabled.EnabledExtensionManager(
            namespace=self.api_extension_namespace(),
            check_func=_check_load_extension,
            invoke_on_load=True,
            invoke_kwds={"extension_info": self.loaded_extension_info})

        mapper = ProjectMapper()

        self.resources = {}

        # NOTE(cyeoh) Core API support is rewritten as extensions
        # but conceptually still have core
        if list(self.api_extension_manager):
            # NOTE(cyeoh): Stevedore raises an exception if there are
            # no plugins detected. I wonder if this is a bug.
            self._register_resources_check_inherits(mapper)
            self.api_extension_manager.map(self._register_controllers)

        missing_core_extensions = self.get_missing_core_extensions(
            self.loaded_extension_info.get_extensions().keys())
        if not self.init_only and missing_core_extensions:
            LOG.critical(_LC("Missing core API extensions: %s"),
                         missing_core_extensions)
            raise exception.CoreAPIMissing(
                missing_apis=missing_core_extensions)

        LOG.info(_LI("Loaded extensions: %s"),
                 sorted(self.loaded_extension_info.get_extensions().keys()))
        super(APIRouterV21, self).__init__(mapper)
Example #52
0
    def remove_from_instance(self, context, instance, security_group_name):
        """Remove the security group associated with the instance."""
        neutron = neutronapi.get_client(context)
        try:
            security_group_id = neutronv20.find_resourceid_by_name_or_id(
                neutron, 'security_group', security_group_name,
                context.project_id)
        except n_exc.NeutronClientException as e:
            exc_info = sys.exc_info()
            if e.status_code == 404:
                msg = (_("Security group %(name)s is not found for "
                         "project %(project)s") % {
                             'name': security_group_name,
                             'project': context.project_id
                         })
                self.raise_not_found(msg)
            else:
                LOG.exception(_LE("Neutron Error:"))
                raise exc_info[0], exc_info[1], exc_info[2]
        params = {'device_id': instance.uuid}
        try:
            ports = neutron.list_ports(**params).get('ports')
        except n_exc.NeutronClientException:
            with excutils.save_and_reraise_exception():
                LOG.exception(_LE("Neutron Error:"))

        if not ports:
            msg = (_("instance_id %s could not be found as device id on"
                     " any ports") % instance.uuid)
            self.raise_not_found(msg)

        found_security_group = False
        for port in ports:
            try:
                port.get('security_groups', []).remove(security_group_id)
            except ValueError:
                # When removing a security group from an instance the security
                # group should be on both ports since it was added this way if
                # done through the nova api. In case it is not a 404 is only
                # raised if the security group is not found on any of the
                # ports on the instance.
                continue

            updated_port = {'security_groups': port['security_groups']}
            try:
                LOG.info(
                    _LI("Adding security group %(security_group_id)s to "
                        "port %(port_id)s"), {
                            'security_group_id': security_group_id,
                            'port_id': port['id']
                        })
                neutron.update_port(port['id'], {'port': updated_port})
                found_security_group = True
            except Exception:
                with excutils.save_and_reraise_exception():
                    LOG.exception(_LE("Neutron Error:"))
        if not found_security_group:
            msg = (_("Security group %(security_group_name)s not associated "
                     "with the instance %(instance)s") % {
                         'security_group_name': security_group_name,
                         'instance': instance.uuid
                     })
            self.raise_not_found(msg)
Example #53
0
    def _update_usage_from_migration(self, context, instance, image_meta,
                                     migration):
        """Update usage for a single migration.  The record may
        represent an incoming or outbound migration.
        """
        uuid = migration.instance_uuid
        LOG.info(_LI("Updating from migration %s") % uuid)

        incoming = (migration.dest_compute == self.host
                    and migration.dest_node == self.nodename)
        outbound = (migration.source_compute == self.host
                    and migration.source_node == self.nodename)
        same_node = (incoming and outbound)

        record = self.tracked_instances.get(uuid, None)
        itype = None

        if same_node:
            # same node resize. record usage for whichever instance type the
            # instance is *not* in:
            if (instance['instance_type_id'] == migration.old_instance_type_id
                ):
                itype = self._get_instance_type(context, instance, 'new_',
                                                migration.new_instance_type_id)
            else:
                # instance record already has new flavor, hold space for a
                # possible revert to the old instance type:
                itype = self._get_instance_type(context, instance, 'old_',
                                                migration.old_instance_type_id)

        elif incoming and not record:
            # instance has not yet migrated here:
            itype = self._get_instance_type(context, instance, 'new_',
                                            migration.new_instance_type_id)

        elif outbound and not record:
            # instance migrated, but record usage for a possible revert:
            itype = self._get_instance_type(context, instance, 'old_',
                                            migration.old_instance_type_id)

        if image_meta is None:
            image_meta = utils.get_image_from_system_metadata(
                instance['system_metadata'])

        if itype:
            host_topology = self.compute_node.get('numa_topology')
            if host_topology:
                host_topology = objects.NUMATopology.obj_from_db_obj(
                    host_topology)
            numa_topology = hardware.numa_get_constraints(itype, image_meta)
            numa_topology = (hardware.numa_fit_instance_to_host(
                host_topology, numa_topology))
            usage = self._get_usage_dict(itype, numa_topology=numa_topology)
            if self.pci_tracker:
                self.pci_tracker.update_pci_for_migration(context, instance)
            self._update_usage(usage)
            if self.pci_tracker:
                self.compute_node['pci_device_pools'] = self.pci_tracker.stats
            else:
                self.compute_node['pci_device_pools'] = []
            self.tracked_migrations[uuid] = (migration, itype)
Example #54
0
        def discon_vol_for_vio(vios_w):
            """Removes the volume from a specific Virtual I/O Server.

            :param vios_w: The VIOS wrapper.
            :return: True if a remove action was done against this VIOS.  False
                     otherwise.
            """
            LOG.debug("Disconnect volume %(vol)s from vios uuid %(uuid)s",
                      dict(vol=self.volume_id, uuid=vios_w.uuid))
            udid, device_name = None, None
            try:
                udid = self._get_udid()
                if not udid:
                    # We lost our bdm data. We'll need to discover it.
                    status, device_name, udid = self._discover_volume_on_vios(
                        vios_w, self.volume_id)

                if udid and not device_name:
                    device_name = vios_w.hdisk_from_uuid(udid)

                if not device_name:
                    LOG.warn(
                        _LW("Disconnect Volume: No mapped device found on Virtual "
                            "I/O Server %(vios)s for volume %(volume_id)s.  "
                            "Volume UDID: %(volume_uid)s"), {
                                'volume_uid': udid,
                                'volume_id': self.volume_id,
                                'vios': vios_w.name
                            })
                    return False

            except Exception as e:
                LOG.warn(
                    _LW("Disconnect Volume: Failed to find disk on Virtual I/O "
                        "Server %(vios_name)s for volume %(volume_id)s. Volume "
                        "UDID: %(volume_uid)s.  Error: %(error)s"), {
                            'error': e,
                            'volume_uid': udid,
                            'vios_name': vios_w.name,
                            'volume_id': self.volume_id
                        })
                return False

            # We have found the device name
            LOG.info(
                _LI("Disconnect Volume: Discovered the device %(hdisk)s "
                    "on Virtual I/O Server %(vios_name)s for volume "
                    "%(volume_id)s.  Volume UDID: %(volume_uid)s."), {
                        'volume_uid': udid,
                        'volume_id': self.volume_id,
                        'vios_name': vios_w.name,
                        'hdisk': device_name
                    })

            # Add the action to remove the mapping when the stg_ftsk is run.
            partition_id = vm.get_vm_id(self.adapter, self.vm_uuid)

            with lockutils.lock(hash(self)):
                self._add_remove_mapping(partition_id, vios_w.uuid,
                                         device_name)

                # Add a step after the mapping removal to also remove the
                # hdisk.
                self._add_remove_hdisk(vios_w, device_name)

            # Found a valid element to remove
            return True
Example #55
0
    def _init_compute_node(self, context, resources):
        """Initialise the compute node if it does not already exist.

        The resource tracker will be inoperable if compute_node
        is not defined. The compute_node will remain undefined if
        we fail to create it or if there is no associated service
        registered.

        If this method has to create a compute node it needs initial
        values - these come from resources.

        :param context: security context
        :param resources: initial values
        """

        # if there is already a compute node just use resources
        # to initialize
        if self.compute_node:
            self._copy_resources(resources)
            return

        # TODO(pmurray): this lookup should be removed when the service_id
        # field in the compute node goes away. At the moment it is deprecated
        # but still a required field, so it has to be assigned below.
        service = self._get_service(context)
        if not service:
            # no service record, disable resource
            return

        # now try to get the compute node record from the
        # database. If we get one we use resources to initialize
        self.compute_node = self._get_compute_node(context)
        if self.compute_node:
            self._copy_resources(resources)
            return

        # there was no local copy and none in the database
        # so we need to create a new compute node. This needs
        # initial values for the database.
        #
        # TODO(pmurray) this section will be cleaned up when we
        # use the ComputeNode object. Here it is the conductor call
        # to compute_node_create() that sets up the compute_node
        # dict. That will change to create the compute_node, initialize
        # it and then save.
        cn = {}
        cn.update(resources)
        # TODO(pmurray) service_id is deprecated but is still a required field.
        # This should be removed when the field is changed.
        cn['service_id'] = service.id
        cn['host'] = self.host
        # initialize load stats from existing instances:
        self._write_ext_resources(cn)
        # NOTE(pmurray): the stats field is stored as a json string. The
        # json conversion will be done automatically by the ComputeNode object
        # so this can be removed when using ComputeNode.
        cn['stats'] = jsonutils.dumps(cn['stats'])
        # pci_passthrough_devices may be in resources but are not
        # stored in compute nodes
        cn.pop('pci_passthrough_devices', None)

        self.compute_node = self.conductor_api.compute_node_create(context, cn)
        LOG.info(
            _LI('Compute_service record created for '
                '%(host)s:%(node)s'), {
                    'host': self.host,
                    'node': self.nodename
                })

        # now we have created a compute node we can copy resources
        # NOTE(pmurray): this has an unnecessary copy until the above
        # is cleaned up.
        self._copy_resources(resources)
Example #56
0
    def create(self, req, body):
        """Creates a new instance event."""
        context = req.environ['nova.context']
        authorize(context, action='create')

        response_events = []
        accepted_events = []
        accepted_instances = set()
        instances = {}
        result = 200

        body_events = body['events']

        for _event in body_events:
            client_event = dict(_event)
            event = objects.InstanceExternalEvent(context)

            event.instance_uuid = client_event.pop('server_uuid')
            event.name = client_event.pop('name')
            event.status = client_event.pop('status', 'completed')
            event.tag = client_event.pop('tag', None)

            instance = instances.get(event.instance_uuid)
            if not instance:
                try:
                    instance = objects.Instance.get_by_uuid(
                        context, event.instance_uuid)
                    instances[event.instance_uuid] = instance
                except exception.InstanceNotFound:
                    LOG.debug(
                        'Dropping event %(name)s:%(tag)s for unknown '
                        'instance %(instance_uuid)s', dict(event.iteritems()))
                    _event['status'] = 'failed'
                    _event['code'] = 404
                    result = 207

            # NOTE: before accepting the event, make sure the instance
            # for which the event is sent is assigned to a host; otherwise
            # it will not be possible to dispatch the event
            if instance:
                if instance.host:
                    accepted_events.append(event)
                    accepted_instances.add(instance)
                    LOG.info(
                        _LI('Creating event %(name)s:%(tag)s for '
                            'instance %(instance_uuid)s'),
                        dict(event.iteritems()))
                    # NOTE: as the event is processed asynchronously verify
                    # whether 202 is a more suitable response code than 200
                    _event['status'] = 'completed'
                    _event['code'] = 200
                else:
                    LOG.debug(
                        "Unable to find a host for instance "
                        "%(instance)s. Dropping event %(event)s", {
                            'instance': event.instance_uuid,
                            'event': event.name
                        })
                    _event['status'] = 'failed'
                    _event['code'] = 422
                    result = 207

            response_events.append(_event)

        if accepted_events:
            self.compute_api.external_instance_event(context,
                                                     accepted_instances,
                                                     accepted_events)
        else:
            msg = _('No instances found for any event')
            raise webob.exc.HTTPNotFound(explanation=msg)

        # FIXME(cyeoh): This needs some infrastructure support so that
        # we have a general way to do this
        robj = wsgi.ResponseObject({'events': response_events})
        robj._code = result
        return robj
Example #57
0
    def _update_inventory_attempt(self, compute_node):
        """Update the inventory for this compute node if needed.

        :param compute_node: The objects.ComputeNode for the operation
        :returns: True if the inventory was updated (or did not need to be),
                  False otherwise.
        """
        inv_data = _compute_node_to_inventory_dict(compute_node)
        curr = self._get_inventory(compute_node)

        # Update our generation immediately, if possible. Even if there
        # are no inventories we should always have a generation but let's
        # be careful.
        server_gen = curr.get('resource_provider_generation')
        if server_gen:
            my_rp = self._resource_providers[compute_node.uuid]
            if server_gen != my_rp.generation:
                LOG.debug(
                    'Updating our resource provider generation '
                    'from %(old)i to %(new)i', {
                        'old': my_rp.generation,
                        'new': server_gen
                    })
            my_rp.generation = server_gen

        # Check to see if we need to update placement's view
        if inv_data == curr.get('inventories', {}):
            return True

        cur_rp_gen = self._resource_providers[compute_node.uuid].generation
        payload = {
            'resource_provider_generation': cur_rp_gen,
            'inventories': inv_data,
        }
        url = '/resource_providers/%s/inventories' % compute_node.uuid
        result = self.put(url, payload)
        if result.status_code == 409:
            LOG.info(_LI('Inventory update conflict for %s'),
                     compute_node.uuid)
            # Invalidate our cache and re-fetch the resource provider
            # to be sure to get the latest generation.
            del self._resource_providers[compute_node.uuid]
            self._ensure_resource_provider(compute_node.uuid,
                                           compute_node.hypervisor_hostname)
            return False
        elif not result:
            LOG.warning(
                _LW('Failed to update inventory for '
                    '%(uuid)s: %(status)i %(text)s'), {
                        'uuid': compute_node.uuid,
                        'status': result.status_code,
                        'text': result.text
                    })
            return False

        if result.status_code != 200:
            LOG.info(
                _LI('Received unexpected response code %(code)i while '
                    'trying to update inventory for compute node %(uuid)s'
                    ': %(text)s'), {
                        'uuid': compute_node.uuid,
                        'code': result.status_code,
                        'text': result.text
                    })
            return False

        # Update our view of the generation for next time
        updated_inventories_result = result.json()
        new_gen = updated_inventories_result['resource_provider_generation']

        self._resource_providers[compute_node.uuid].generation = new_gen
        LOG.debug('Updated inventory for %s at generation %i' %
                  (compute_node.uuid, new_gen))
        return True
    def start(self):
        verstr = version.version_string_with_package()
        LOG.info(_LI('Starting %(topic)s node (version %(version)s)'), {
            'topic': self.topic,
            'version': verstr
        })
        self.basic_config_check()
        self.manager.init_host()
        self.model_disconnected = False
        ctxt = context.get_admin_context()
        self.service_ref = objects.Service.get_by_host_and_binary(
            ctxt, self.host, self.binary)
        if self.service_ref:
            _update_service_ref(self.service_ref)

        else:
            try:
                self.service_ref = _create_service_ref(self, ctxt)
            except (exception.ServiceTopicExists,
                    exception.ServiceBinaryExists):
                # NOTE(danms): If we race to create a record with a sibling
                # worker, don't fail here.
                self.service_ref = objects.Service.get_by_host_and_binary(
                    ctxt, self.host, self.binary)

        self.manager.pre_start_hook()

        if self.backdoor_port is not None:
            self.manager.backdoor_port = self.backdoor_port

        LOG.debug("Creating RPC server for service %s", self.topic)

        target = messaging.Target(topic=self.topic, server=self.host)

        endpoints = [
            self.manager,
            baserpc.BaseRPCAPI(self.manager.service_name, self.backdoor_port)
        ]
        endpoints.extend(self.manager.additional_endpoints)

        serializer = objects_base.NovaObjectSerializer()

        self.rpcserver = rpc.get_server(target, endpoints, serializer)
        self.rpcserver.start()

        self.manager.post_start_hook()

        LOG.debug("Join ServiceGroup membership for this service %s",
                  self.topic)
        # Add service to the ServiceGroup membership group.
        self.servicegroup_api.join(self.host, self.topic, self)

        if self.periodic_enable:
            if self.periodic_fuzzy_delay:
                initial_delay = random.randint(0, self.periodic_fuzzy_delay)
            else:
                initial_delay = None

            self.tg.add_dynamic_timer(
                self.periodic_tasks,
                initial_delay=initial_delay,
                periodic_interval_max=self.periodic_interval_max)
Example #59
0
 def reset(self):
     LOG.info(_LI('Reloading compute RPC API'))
     compute_rpcapi.LAST_VERSION = None
     self.compute_rpcapi = compute_rpcapi.ComputeAPI()
Example #60
0
    def start(self):
        verstr = version.version_string_with_package()
        LOG.info(_LI('Starting %(topic)s node (version %(version)s)'),
                  {'topic': self.topic, 'version': verstr})
        self.basic_config_check()
        self.manager.init_host()
        self.model_disconnected = False
        ctxt = context.get_admin_context()
        self.service_ref = objects.Service.get_by_host_and_binary(
            ctxt, self.host, self.binary)
        if self.service_ref:
            _update_service_ref(self.service_ref)

        else:
            try:
                self.service_ref = _create_service_ref(self, ctxt)
            except (exception.ServiceTopicExists,
                    exception.ServiceBinaryExists):
                # NOTE(danms): If we race to create a record with a sibling
                # worker, don't fail here.
                self.service_ref = objects.Service.get_by_host_and_binary(
                    ctxt, self.host, self.binary)

        self.manager.pre_start_hook()

        if self.backdoor_port is not None:
            self.manager.backdoor_port = self.backdoor_port

        LOG.debug("Creating RPC server for service %s", self.topic)

        target = messaging.Target(topic=self.topic, server=self.host)

        endpoints = [
            self.manager,
            baserpc.BaseRPCAPI(self.manager.service_name, self.backdoor_port)
        ]
        endpoints.extend(self.manager.additional_endpoints)

        serializer = objects_base.NovaObjectSerializer()

        self.rpcserver = rpc.get_server(target, endpoints, serializer)
        self.rpcserver.start()

        self.manager.post_start_hook()

        LOG.debug("Join ServiceGroup membership for this service %s",
                  self.topic)
        # Add service to the ServiceGroup membership group.
        self.servicegroup_api.join(self.host, self.topic, self)

        # WRS: Write volatile flag file indicating service has started.
        if CONF.service_enabled_flag:
            volatile_dir = '/var/run/nova'
            if os.path.isdir(volatile_dir):
                flag = "{}/.nova_{}_enabled".format(volatile_dir, self.topic)
                try:
                    open(flag, 'w').close()
                    LOG.info('service %(topic)s ready',
                             {'topic': self.topic})
                except Exception as e:
                    LOG.error(
                        'Cannot create file: %(file)s, error=%(error)s',
                        {'file': flag, 'error': e})

        if self.periodic_enable:
            if self.periodic_fuzzy_delay:
                initial_delay = random.randint(0, self.periodic_fuzzy_delay)
            else:
                initial_delay = None

            # WRS - Do not delay start of non-compute services. This improves
            # responsiveness of no-reboot-patching of controllers by making
            # the nova-scheduler weigher patch audit run immediately.
            if 'compute' not in self.binary:
                initial_delay = None
            self.tg.add_dynamic_timer(self.periodic_tasks,
                                     initial_delay=initial_delay,
                                     periodic_interval_max=
                                        self.periodic_interval_max)