def _get_provisioning_type(extra_specs): from_config = False ptype = extra_specs.get(PROVISIONING_TYPE_KEY) if not ptype: ptype = extra_specs.get(LEGACY_PROVISIONING_TYPE_KEY) if ptype: LOG.warning( _LW("Deprecated '%(legacy_key)s' flavor key is used to " "specify ScaleIO provisioning type. Please use '%(key)s' " "instead."), {'legacy_key': LEGACY_PROVISIONING_TYPE_KEY, 'key': PROVISIONING_TYPE_KEY}) else: ptype = CONF.scaleio.default_provisioning_type from_config = True if ptype in ['ThickProvisioned', 'ThinProvisioned']: opt_source = (_('config') if from_config else _('flavor')) value_to_use = {'ThickProvisioned': 'thick', 'ThinProvisioned': 'thin'}[ptype] LOG.warning( _LW("Deprecated provisioning type '%(legacy_type)s' is specified " "in %(source)s. Please change the value to '%(type)s', " "because it will not be supported in next Nova releases."), {'legacy_type': ptype, 'source': opt_source, 'type': value_to_use}) return ptype
def get_info(self, instance): """Get the current state and resource usage for this instance. If the instance is not found this method returns (a dictionary with) NOSTATE and all resources == 0. :param instance: the instance object. :returns: a InstanceInfo object """ try: node = _validate_instance_and_node(self.ironicclient, instance) except exception.InstanceNotFound: return hardware.InstanceInfo( state=map_power_state(ironic_states.NOSTATE)) properties = self._parse_node_properties(node) memory_kib = properties['memory_mb'] * 1024 if memory_kib == 0: LOG.warning(_LW("Warning, memory usage is 0 for " "%(instance)s on baremetal node %(node)s."), {'instance': instance.uuid, 'node': instance.node}) num_cpu = properties['cpus'] if num_cpu == 0: LOG.warning(_LW("Warning, number of cpus is 0 for " "%(instance)s on baremetal node %(node)s."), {'instance': instance.uuid, 'node': instance.node}) return hardware.InstanceInfo(state=map_power_state(node.power_state), max_mem_kb=memory_kib, mem_kb=memory_kib, num_cpu=num_cpu)
def setup_host(self): """Setup VirtualBox to use the received VirtualBox Remote Desktop Extension if `remote_display` is enabled. """ if not self.remote_display: LOG.debug("VRDE server is disabled.") return if self.vrde_module not in self._ext_packs: LOG.warning( i18n._LW("The `%(vrde_module)s` VRDE Module is not " "available."), {"vrde_module": self.vrde_module}) return try: self._vbox_manage.set_property(constants.VBOX_VRDE_EXTPACK, self.vrde_module) except vbox_exc.VBoxManageError as exc: LOG.warning( i18n._LW("Failed to set VRDE Module `%(vrde_module)s`: " "%(reason)s"), {"vrde_module": self.vrde_module, "reason": exc}) return False LOG.info(i18n._LI("The VRDE Module used is %(vrde_module)s"), {"vrde_module": self.vrde_module}) return True
def get_constraint_matrix(self, hosts, filter_properties): num_hosts = len(hosts) num_instances = filter_properties.get('num_instances') constraint_matrix = [[True for j in xrange(num_instances)] for i in xrange(num_hosts)] # get requested disk instance_type = filter_properties.get('instance_type') or {} requested_disk = (1024 * (instance_type.get('root_gb', 0) + instance_type.get('ephemeral_gb', 0)) + instance_type.get('swap', 0)) for inst_type_key in ['root_gb', 'ephemeral_gb', 'swap']: if inst_type_key not in instance_type: LOG.warn(_LW("Disk information of requested instances\' %s " "is incomplete, use 0 as the requested size."), inst_type_key) if requested_disk <= 0: LOG.warn(_LW("ExactDiskConstraint is skipped because requested " "instance disk size is 0 or invalid.")) return constraint_matrix for i in xrange(num_hosts): if requested_disk == hosts[i].free_disk_mb: constraint_matrix[i] = ( [True] + [False for j in xrange(num_instances - 1)]) else: constraint_matrix[i] = [False for j in xrange(num_instances)] LOG.debug("%(host)s does not have exactly %(requested_disk)s " "MB disk, it has %(usable_disk)s MB disk.", {'host': hosts[i], 'requested_disk': requested_disk, 'usable_disk': hosts[i].free_disk_mb}) return constraint_matrix
def handle_schedule_error(context, ex, instance_uuid, request_spec): """On run_instance failure, update instance state and send notifications. """ if isinstance(ex, exception.NoValidHost): LOG.warning(_LW("NoValidHost exception with message: \'%s\'"), ex.format_message().strip(), instance_uuid=instance_uuid) else: LOG.exception(_LE("Exception during scheduler.run_instance")) state = vm_states.ERROR.upper() LOG.warning(_LW('Setting instance to %s state.'), state, instance_uuid=instance_uuid) (old_ref, new_ref) = db.instance_update_and_get_original(context, instance_uuid, {'vm_state': vm_states.ERROR, 'task_state': None}) notifications.send_update(context, old_ref, new_ref, service="scheduler") compute_utils.add_instance_fault_from_exc(context, new_ref, ex, sys.exc_info()) properties = request_spec.get('instance_properties', {}) payload = dict(request_spec=request_spec, instance_properties=properties, instance_id=instance_uuid, state=vm_states.ERROR, method='run_instance', reason=ex) rpc.get_notifier('scheduler').error(context, 'scheduler.run_instance', payload)
def get_constraint_matrix(self, hosts, filter_properties): num_hosts = len(hosts) num_instances = filter_properties.get('num_instances') constraint_matrix = [[True for j in xrange(num_instances)] for i in xrange(num_hosts)] # get requested ram instance_type = filter_properties.get('instance_type') or {} requested_ram = instance_type.get('memory_mb', 0) if 'memory_mb' not in instance_type: LOG.warn(_LW("No information about requested instances\' RAM size " "was found, default value (0) is used.")) if requested_ram <= 0: LOG.warn(_LW("ExactRamConstraint is skipped because requested " "instance RAM size is 0 or invalid.")) return constraint_matrix for i in xrange(num_hosts): if requested_ram == hosts[i].free_ram_mb: constraint_matrix[i] = ( [True] + [False for j in xrange(num_instances - 1)]) else: constraint_matrix[i] = [False for j in xrange(num_instances)] LOG.debug("%(host)s does not have exactly %(requested_ram)s MB" "RAM, it has %(usable_ram)s MB RAM.", {'host': hosts[i], 'requested_ram': requested_ram, 'usable_ram': hosts[i].free_ram_mb}) return constraint_matrix
def driver_detach(self, context, instance, volume_api, virt_driver): connection_info = self['connection_info'] mp = self['mount_device'] volume_id = self.volume_id LOG.info(_LI('Attempting to driver detach volume %(volume_id)s from ' 'mountpoint %(mp)s'), {'volume_id': volume_id, 'mp': mp}, instance=instance) try: if not virt_driver.instance_exists(instance): LOG.warning(_LW('Detaching volume from unknown instance'), instance=instance) encryption = encryptors.get_encryption_metadata(context, volume_api, volume_id, connection_info) virt_driver.detach_volume(connection_info, instance, mp, encryption=encryption) except exception.DiskNotFound as err: LOG.warning(_LW('Ignoring DiskNotFound exception while ' 'detaching volume %(volume_id)s from ' '%(mp)s : %(err)s'), {'volume_id': volume_id, 'mp': mp, 'err': err}, instance=instance) except Exception: with excutils.save_and_reraise_exception(): LOG.exception(_LE('Failed to detach volume ' '%(volume_id)s from %(mp)s'), {'volume_id': volume_id, 'mp': mp}, instance=instance) volume_api.roll_detaching(context, volume_id)
def teardown(self): LOG.debug("Tearing down appliance") try: try: if self.mount: self.handle.aug_close() except RuntimeError as e: LOG.warning(_LW("Failed to close augeas %s"), e) try: self.handle.shutdown() except AttributeError: # Older libguestfs versions haven't an explicit shutdown pass except RuntimeError as e: LOG.warning(_LW("Failed to shutdown appliance %s"), e) try: self.handle.close() except AttributeError: # Older libguestfs versions haven't an explicit close pass except RuntimeError as e: LOG.warning(_LW("Failed to close guest handle %s"), e) finally: # dereference object and implicitly close() self.handle = None
def _parse_node_properties(self, node): """Helper method to parse the node's properties.""" properties = {} for prop in ('cpus', 'memory_mb', 'local_gb'): try: properties[prop] = int(node.properties.get(prop, 0)) except (TypeError, ValueError): LOG.warning(_LW('Node %(uuid)s has a malformed "%(prop)s". ' 'It should be an integer.'), {'uuid': node.uuid, 'prop': prop}) properties[prop] = 0 raw_cpu_arch = node.properties.get('cpu_arch', None) try: cpu_arch = arch.canonicalize(raw_cpu_arch) except exception.InvalidArchitectureName: cpu_arch = None if not cpu_arch: LOG.warning(_LW("cpu_arch not defined for node '%s'"), node.uuid) properties['cpu_arch'] = cpu_arch properties['raw_cpu_arch'] = raw_cpu_arch properties['capabilities'] = node.properties.get('capabilities') return properties
def wrapper(self, *a, **k): try: return f(self, *a, **k) except ks_exc.EndpointNotFound: warn_limit( self, _LW('The placement API endpoint not found. Placement is ' 'optional in Newton, but required in Ocata. Please ' 'enable the placement service before upgrading.')) except ks_exc.MissingAuthPlugin: warn_limit( self, _LW('No authentication information found for placement ' 'API. Placement is optional in Newton, but required ' 'in Ocata. Please enable the placement service ' 'before upgrading.')) except ks_exc.Unauthorized: warn_limit( self, _LW('Placement service credentials do not work. ' 'Placement is optional in Newton, but required ' 'in Ocata. Please enable the placement service ' 'before upgrading.')) except ks_exc.ConnectFailure: msg = _LW('Placement API service is not responding.') LOG.warning(msg)
def run_recover_tasks(host, guest, instance, on_migration_failure): """Run any pending migration recovery tasks :param host: a nova.virt.libvirt.host.Host :param guest: a nova.virt.libvirt.guest.Guest :param instance: a nova.objects.Instance :param on_migration_failure: queue of recovery tasks Run any recovery tasks provided in the on_migration_failure queue. Currently the only valid task that can be requested is "unpause". Other tasks will be ignored """ while on_migration_failure: task = on_migration_failure.popleft() # NOTE(tdurakov): there is still possibility to leave # instance paused in case of live-migration failure. # This check guarantee that instance will be resumed # in this case if task == 'unpause': try: state = guest.get_power_state(host) if state == power_state.PAUSED: guest.resume() except Exception as e: LOG.warning(_LW("Failed to resume paused instance " "before live-migration rollback %s"), e, instance=instance) else: LOG.warning(_LW("Unknown migration task '%(task)s'"), {"task": task}, instance=instance)
def should_abort(instance, now, progress_time, progress_timeout, elapsed, completion_timeout, migration_status): """Determine if the migration should be aborted :param instance: a nova.objects.Instance :param now: current time in secs since epoch :param progress_time: when progress was last made in secs since epoch :param progress_timeout: time in secs to allow for progress :param elapsed: total elapsed time of migration in secs :param completion_timeout: time in secs to allow for completion :param migration_status: current status of the migration Check the progress and completion timeouts to determine if either of them have been hit, and should thus cause migration to be aborted Avoid migration to be aborted if it is running in post-copy mode :returns: True if migration should be aborted, False otherwise """ if migration_status == "running (post-copy)": return False if progress_timeout != 0 and (now - progress_time) > progress_timeout: LOG.warning(_LW("Live migration stuck for %d sec"), (now - progress_time), instance=instance) return True if completion_timeout != 0 and elapsed > completion_timeout: LOG.warning(_LW("Live migration not completed after %d sec"), completion_timeout, instance=instance) return True return False
def remove_snap(self, volume, name, ignore_errors=False, pool=None, force=False): """Removes a snapshot from an RBD volume. :volume: Name of RBD object :name: Name of snapshot :ignore_errors: whether or not to log warnings on failures :pool: Name of pool :force: Remove snapshot even if it is protected """ with RBDVolumeProxy(self, str(volume), pool=pool) as vol: if name in [snap.get('name', '') for snap in vol.list_snaps()]: if vol.is_protected_snap(name): if force: tpool.execute(vol.unprotect_snap, name) elif not ignore_errors: LOG.warning(_LW('snapshot(%(name)s) on rbd ' 'image(%(img)s) is protected, ' 'skipping'), {'name': name, 'img': volume}) return LOG.debug('removing snapshot(%(name)s) on rbd image(%(img)s)', {'name': name, 'img': volume}) tpool.execute(vol.remove_snap, name) elif not ignore_errors: LOG.warning(_LW('no snapshot(%(name)s) found on rbd ' 'image(%(img)s)'), {'name': name, 'img': volume})
def power_off(self, timeout=0, retry_interval=10): """Power off z/VM instance.""" try: self._power_state("PUT", "softoff") except exception.ZVMXCATInternalError as err: err_str = err.format_message() if ("Return Code: 200" in err_str and "Reason Code: 12" in err_str): # Instance already not active LOG.warn(_LW("z/VM instance %s not active") % self._name) return else: msg = _("Failed to power off instance: %s") % err_str LOG.error(msg) raise nova_exception.InstancePowerOffFailure(reason=msg) timeout = timeout or CONF.shutdown_timeout retry_interval = retry_interval or 10 retry_count = timeout // retry_interval while (retry_count > 0): if self._get_power_stat() == power_state.SHUTDOWN: # In shutdown state already return else: time.sleep(retry_interval) retry_count -= 1 LOG.warn(_LW("Failed to shutdown instance %(inst)s in %(time)d " "seconds") % {'inst': self._name, 'time': timeout})
def _check_prerequisites(self): """Sanity checks before attempting to mount SOFS.""" # config is mandatory config = CONF.libvirt.scality_sofs_config if not config: msg = _LW("Value required for 'scality_sofs_config'") LOG.warn(msg) raise exception.NovaException(msg) # config can be a file path or a URL, check it if urlparse.urlparse(config).scheme == '': # turn local path into URL config = 'file://%s' % config try: urllib2.urlopen(config, timeout=5).close() except urllib2.URLError as e: msg = _LW("Cannot access 'scality_sofs_config': %s") % e LOG.warn(msg) raise exception.NovaException(msg) # mount.sofs must be installed if not os.access('/sbin/mount.sofs', os.X_OK): msg = _LW("Cannot execute /sbin/mount.sofs") LOG.warn(msg) raise exception.NovaException(msg)
def _parse_node_properties(self, node): """Helper method to parse the node's properties.""" properties = {} for prop in ("cpus", "memory_mb", "local_gb"): try: properties[prop] = int(node.properties.get(prop, 0)) except (TypeError, ValueError): LOG.warning( _LW('Node %(uuid)s has a malformed "%(prop)s". ' "It should be an integer."), {"uuid": node.uuid, "prop": prop}, ) properties[prop] = 0 raw_cpu_arch = node.properties.get("cpu_arch", None) try: cpu_arch = arch.canonicalize(raw_cpu_arch) except exception.InvalidArchitectureName: cpu_arch = None if not cpu_arch: LOG.warning(_LW("cpu_arch not defined for node '%s'"), node.uuid) properties["cpu_arch"] = cpu_arch properties["raw_cpu_arch"] = raw_cpu_arch properties["capabilities"] = node.properties.get("capabilities") return properties
def get(self): j = {} for target in CONF.api.vendordata_dynamic_targets: # NOTE(mikal): a target is composed of the following: # name@url # where name is the name to use in the metadata handed to # instances, and url is the URL to fetch it from if target.find('@') == -1: LOG.warning(_LW('Vendordata target %(target)s lacks a name. ' 'Skipping'), {'target': target}, instance=self.instance) continue tokens = target.split('@') name = tokens[0] url = '@'.join(tokens[1:]) if name in j: LOG.warning(_LW('Vendordata already contains an entry named ' '%(target)s. Skipping'), {'target': target}, instance=self.instance) continue j[name] = self._do_request(name, url) return j
def _create_port(self, client, tenant_id, network_id, port_req_body, fixed_ip=None, security_group_ids=None, dhcp_opts=None): try: if fixed_ip: port_req_body['port']['fixed_ips'] = [{'ip_address': fixed_ip}] port_req_body['port']['network_id'] = network_id port_req_body['port']['admin_state_up'] = True port_req_body['port']['tenant_id'] = tenant_id if security_group_ids: port_req_body['port']['security_groups'] = security_group_ids if dhcp_opts is not None: port_req_body['port']['extra_dhcp_opts'] = dhcp_opts port_id = client.create_port(port_req_body)['port']['id'] LOG.debug('Successfully created port: %s', port_id) return port_id except neutron_client_exc.OverQuotaClient: LOG.warning(_LW( 'Neutron error: Port quota exceeded in tenant: %s'), port_req_body['port']['tenant_id']) raise exception.PortLimitExceeded() except neutron_client_exc.IpAddressGenerationFailureClient: LOG.warning(_LW('Neutron error: No more fixed IPs in network: %s'), network_id) raise exception.NoMoreFixedIps() except neutron_client_exc.MacAddressInUseClient: LOG.warning(_LW('Neutron error: MAC address %(mac)s is already ' 'in use on network %(network)s.') % {'mac': mac_address, 'network': network_id}) raise exception.PortInUse(port_id=mac_address) except neutron_client_exc.NeutronClientException: with excutils.save_and_reraise_exception(): LOG.exception(_LE('Neutron error creating port on network %s'), network_id)
def wrapper(self, *a, **k): try: # We've failed in a non recoverable way, fully give up. if self._disabled: return return f(self, *a, **k) except ks_exc.EndpointNotFound: msg = _LW("The placement API endpoint not found. Optional use of " "placement API for reporting is now disabled.") LOG.warning(msg) self._disabled = True except ks_exc.MissingAuthPlugin: msg = _LW("No authentication information found for placement API. " "Optional use of placement API for reporting is now " "disabled.") LOG.warning(msg) self._disabled = True except ks_exc.Unauthorized: msg = _LW('Placement service credentials do not work. Optional ' 'use of placement API for reporting is now disabled.') LOG.warning(msg) self._disabled = True except ks_exc.ConnectFailure: msg = _LW('Placement API service is not responding.') LOG.warning(msg)
def wrapper(self, *a, **k): try: return f(self, *a, **k) except ks_exc.EndpointNotFound: warn_limit( self, _LW('The placement API endpoint not found. Placement is ' 'optional in Newton, but required in Ocata. Please ' 'enable the placement service before upgrading.')) except ks_exc.MissingAuthPlugin: warn_limit( self, _LW('No authentication information found for placement ' 'API. Placement is optional in Newton, but required ' 'in Ocata. Please enable the placement service ' 'before upgrading.')) except ks_exc.Unauthorized: warn_limit( self, _LW('Placement service credentials do not work. ' 'Placement is optional in Newton, but required ' 'in Ocata. Please enable the placement service ' 'before upgrading.')) except ks_exc.DiscoveryFailure: # TODO(_gryf): Looks like DiscoveryFailure is not the only missing # exception here. In Pike we should take care about keystoneauth1 # failures handling globally. warn_limit(self, _LW('Discovering suitable URL for placement API ' 'failed.')) except ks_exc.ConnectFailure: msg = _LW('Placement API service is not responding.') LOG.warning(msg)
def authorize_console(self, context, token, console_type, host, port, internal_access_path, instance_uuid): token_dict = {'token': token, 'instance_uuid': instance_uuid, 'console_type': console_type, 'host': host, 'port': port, 'internal_access_path': internal_access_path, 'last_activity_at': time.time()} data = jsonutils.dumps(token_dict) # We need to log the warning message if the token is not cached # successfully, because the failure will cause the console for # instance to not be usable. if not self.mc.set(token.encode('UTF-8'), data, CONF.console_token_ttl): LOG.warning(_LW("Token: %(token)s failed to save into memcached."), {'token': token}) tokens = self._get_tokens_for_instance(instance_uuid) # Remove the expired tokens from cache. for tok in tokens: token_str = self.mc.get(tok.encode('UTF-8')) if not token_str: tokens.remove(tok) tokens.append(token) if not self.mc.set(instance_uuid.encode('UTF-8'), jsonutils.dumps(tokens)): LOG.warning(_LW("Instance: %(instance_uuid)s failed to save " "into memcached"), {'instance_uuid': instance_uuid}) LOG.audit(_("Received Token: %(token)s, %(token_dict)s"), {'token': token, 'token_dict': token_dict})
def set_vm_state_and_notify(context, instance_uuid, service, method, updates, ex, request_spec): """changes VM state and notifies.""" LOG.warning(_LW("Failed to %(service)s_%(method)s: %(ex)s"), {'service': service, 'method': method, 'ex': ex}) vm_state = updates['vm_state'] properties = request_spec.get('instance_properties', {}) # NOTE(vish): We shouldn't get here unless we have a catastrophic # failure, so just set the instance to its internal state notifier = rpc.get_notifier(service) state = vm_state.upper() LOG.warning(_LW('Setting instance to %s state.'), state, instance_uuid=instance_uuid) instance = objects.Instance(context=context, uuid=instance_uuid, **updates) instance.obj_reset_changes(['uuid']) instance.save() compute_utils.add_instance_fault_from_exc(context, instance, ex, sys.exc_info()) payload = dict(request_spec=request_spec, instance_properties=properties, instance_id=instance_uuid, state=vm_state, method=method, reason=ex) event_type = '%s.%s' % (service, method) notifier.error(context, event_type, payload)
def detach(self, context, volume_id, instance_uuid=None, attachment_id=None): if attachment_id is None: volume = self.get(context, volume_id) if volume['multiattach']: attachments = volume.get('attachments', {}) if instance_uuid: attachment_id = attachments.get(instance_uuid, {}).\ get('attachment_id') if not attachment_id: LOG.warning(_LW("attachment_id couldn't be retrieved " "for volume %(volume_id)s with " "instance_uuid %(instance_id)s. The " "volume has the 'multiattach' flag " "enabled, without the attachment_id " "Cinder most probably cannot perform " "the detach."), {'volume_id': volume_id, 'instance_id': instance_uuid}) else: LOG.warning(_LW("attachment_id couldn't be retrieved for " "volume %(volume_id)s. The volume has the " "'multiattach' flag enabled, without the " "attachment_id Cinder most probably " "cannot perform the detach."), {'volume_id': volume_id}) cinderclient(context).volumes.detach(volume_id, attachment_id)
def _update_usage_from_migrations(self, context, migrations): filtered = {} instances = {} self.tracked_migrations.clear() # do some defensive filtering against bad migrations records in the # database: for migration in migrations: uuid = migration.instance_uuid try: if uuid not in instances: instances[uuid] = migration.instance except exception.InstanceNotFound as e: # migration referencing deleted instance LOG.debug("Migration instance not found: %s", e) continue # skip migration if instance isn't in a resize state: if not _instance_in_resize_state(instances[uuid]): LOG.warning(_LW("Instance not resizing, skipping migration."), instance_uuid=uuid) continue # filter to most recently updated migration for each instance: other_migration = filtered.get(uuid, None) if not other_migration or migration.updated_at >= other_migration.updated_at: filtered[uuid] = migration for migration in filtered.values(): instance = instances[migration.instance_uuid] try: self._update_usage_from_migration(context, instance, None, migration) except exception.FlavorNotFound: LOG.warning(_LW("Flavor could not be found, skipping " "migration."), instance_uuid=uuid) continue
def instance_claim(self, context, instance_ref, limits=None): """Indicate that some resources are needed for an upcoming compute instance build operation. This should be called before the compute node is about to perform an instance build operation that will consume additional resources. :param context: security context :param instance_ref: instance to reserve resources for. :type instance_ref: nova.objects.instance.Instance object :param limits: Dict of oversubscription limits for memory, disk, and CPUs. :returns: A Claim ticket representing the reserved resources. It can be used to revert the resource usage if an error occurs during the instance build. """ if self.disabled: # compute_driver doesn't support resource tracking, just # set the 'host' and node fields and continue the build: self._set_instance_host_and_node(context, instance_ref) return claims.NopClaim() # sanity checks: if instance_ref.host: LOG.warning( _LW("Host field should not be set on the instance " "until resources have been claimed."), instance=instance_ref, ) if instance_ref.node: LOG.warning( _LW("Node field should not be set on the instance " "until resources have been claimed."), instance=instance_ref, ) # get memory overhead required to build this instance: overhead = self.driver.estimate_instance_overhead(instance_ref) LOG.debug( "Memory overhead for %(flavor)d MB instance; %(overhead)d " "MB", {"flavor": instance_ref.memory_mb, "overhead": overhead["memory_mb"]}, ) claim = claims.Claim(context, instance_ref, self, self.compute_node, overhead=overhead, limits=limits) # self._set_instance_host_and_node() will save instance_ref to the DB # so set instance_ref['numa_topology'] first. We need to make sure # that numa_topology is saved while under COMPUTE_RESOURCE_SEMAPHORE # so that the resource audit knows about any cpus we've pinned. instance_ref.numa_topology = claim.claimed_numa_topology self._set_instance_host_and_node(context, instance_ref) # Mark resources in-use and update stats self._update_usage_from_instance(context, instance_ref) elevated = context.elevated() # persist changes to the compute node: self._update(elevated) return claim
def discon_vol_for_vio(vios_w): """Removes the volume from a specific Virtual I/O Server. :param vios_w: The VIOS wrapper. :return: True if a remove action was done against this VIOS. False otherwise. """ LOG.debug("Disconnect volume %(vol)s from vios uuid %(uuid)s", dict(vol=self.volume_id, uuid=vios_w.uuid)) udid, device_name = None, None try: udid = self._get_udid() if not udid: # We lost our bdm data. We'll need to discover it. status, device_name, udid = self._discover_volume_on_vios( vios_w, self.volume_id) if udid and not device_name: device_name = vios_w.hdisk_from_uuid(udid) if not device_name: LOG.warn(_LW( "Disconnect Volume: No mapped device found on Virtual " "I/O Server %(vios)s for volume %(volume_id)s. " "Volume UDID: %(volume_uid)s"), {'volume_uid': udid, 'volume_id': self.volume_id, 'vios': vios_w.name}) return False except Exception as e: LOG.warn(_LW( "Disconnect Volume: Failed to find disk on Virtual I/O " "Server %(vios_name)s for volume %(volume_id)s. Volume " "UDID: %(volume_uid)s. Error: %(error)s"), {'error': e, 'volume_uid': udid, 'vios_name': vios_w.name, 'volume_id': self.volume_id}) return False # We have found the device name LOG.info(_LI("Disconnect Volume: Discovered the device %(hdisk)s " "on Virtual I/O Server %(vios_name)s for volume " "%(volume_id)s. Volume UDID: %(volume_uid)s."), {'volume_uid': udid, 'volume_id': self.volume_id, 'vios_name': vios_w.name, 'hdisk': device_name}) # Add the action to remove the mapping when the stg_ftsk is run. partition_id = vm.get_vm_id(self.adapter, self.vm_uuid) with lockutils.lock(hash(self)): self._add_remove_mapping(partition_id, vios_w.uuid, device_name) # Add a step after the mapping removal to also remove the # hdisk. self._add_remove_hdisk(vios_w, device_name) # Found a valid element to remove return True
def unplug(self, instance, vif, vm_ref): """unplug vif: 1. unplug and destroy vif. 2. delete the patch port pair between the integration bridge and the interim network. 3. destroy the interim network 4. delete the OVS bridge service for the interim network """ super(XenAPIOpenVswitchDriver, self).unplug(instance, vif, vm_ref) net_name = self.get_vif_interim_net_name(vif) network = network_utils.find_network_with_name_label( self._session, net_name) if network is None: return vifs = self._session.network.get_VIFs(network) if vifs: # only remove the interim network when it's empty. # for resize/migrate on local host, vifs on both of the # source and target VM will be connected to the same # interim network. return LOG.debug('destroying patch port pair for vif: vif_id=%(vif_id)s', {'vif_id': vif['id']}) bridge_name = self._session.network.get_bridge(network) patch_port1, patch_port2 = self._get_patch_port_pair_names(vif['id']) try: # delete the patch port pair self._ovs_del_port(bridge_name, patch_port1) self._ovs_del_port(CONF.xenserver.ovs_integration_bridge, patch_port2) except Exception as e: LOG.warn(_LW("Failed to delete patch port pair for vif %(if)s," " exception:%(exception)s"), {'if': vif, 'exception': e}, instance=instance) raise exception.VirtualInterfaceUnplugException( reason=_("Failed to delete patch port pair")) LOG.debug('destroying network: network=%(network)s,' 'bridge=%(br)s', {'network': network, 'br': bridge_name}) try: self._session.network.destroy(network) # delete bridge if it still exists. # As there is patch port existing on this bridge when destroying # the VM vif (which happens when shutdown the VM), the bridge # won't be destroyed automatically by XAPI. So let's destroy it # at here. self._ovs_del_br(bridge_name) except Exception as e: LOG.warn(_LW("Failed to delete bridge for vif %(if)s, " "exception:%(exception)s"), {'if': vif, 'exception': e}, instance=instance) raise exception.VirtualInterfaceUnplugException( reason=_("Failed to delete bridge"))
def test_get_all_host_states(self): context = 'fake_context' self.mox.StubOutWithMock(db, 'compute_node_get_all') self.mox.StubOutWithMock(host_manager.LOG, 'warn') db.compute_node_get_all(context).AndReturn(fakes.COMPUTE_NODES) # node 3 host physical disk space is greater than database host_manager.LOG.warning(_LW("Host %(hostname)s has more disk space " "than database expected (%(physical)sgb >" " %(database)sgb)"), {'physical': 3333, 'database': 3072, 'hostname': 'node3'}) # Invalid service host_manager.LOG.warning(_LW("No service for compute ID %s"), 5) self.mox.ReplayAll() self.host_manager.get_all_host_states(context) host_states_map = self.host_manager.host_state_map self.assertEqual(len(host_states_map), 4) # Check that .service is set properly for i in xrange(4): compute_node = fakes.COMPUTE_NODES[i] host = compute_node['service']['host'] node = compute_node['hypervisor_hostname'] state_key = (host, node) self.assertEqual(host_states_map[state_key].service, compute_node['service']) self.assertEqual(host_states_map[('host1', 'node1')].free_ram_mb, 512) # 511GB self.assertEqual(host_states_map[('host1', 'node1')].free_disk_mb, 524288) self.assertEqual(host_states_map[('host2', 'node2')].free_ram_mb, 1024) # 1023GB self.assertEqual(host_states_map[('host2', 'node2')].free_disk_mb, 1048576) self.assertEqual(host_states_map[('host3', 'node3')].free_ram_mb, 3072) # 3071GB self.assertEqual(host_states_map[('host3', 'node3')].free_disk_mb, 3145728) self.assertThat( objects.NUMATopology.obj_from_db_obj( host_states_map[('host3', 'node3')].numa_topology )._to_dict(), matchers.DictMatches(fakes.NUMA_TOPOLOGY._to_dict())) self.assertEqual(host_states_map[('host4', 'node4')].free_ram_mb, 8192) # 8191GB self.assertEqual(host_states_map[('host4', 'node4')].free_disk_mb, 8388608)
def rebuild_instance(self, context, instance, orig_image_ref, image_ref, injected_files, new_pass, orig_sys_metadata, bdms, recreate, on_shared_storage, preserve_ephemeral=False, host=None): with compute_utils.EventReporter(context, 'rebuild_server', instance.uuid): if not host: # NOTE(lcostantino): Retrieve scheduler filters for the # instance when the feature is available filter_properties = {'ignore_hosts': [instance.host]} request_spec = scheduler_utils.build_request_spec(context, image_ref, [instance]) try: scheduler_utils.setup_instance_group(context, request_spec, filter_properties) hosts = self.scheduler_client.select_destinations(context, request_spec, filter_properties) host = hosts.pop(0)['host'] except exception.NoValidHost as ex: with excutils.save_and_reraise_exception(): self._set_vm_state_and_notify(context, instance.uuid, 'rebuild_server', {'vm_state': instance.vm_state, 'task_state': None}, ex, request_spec) LOG.warning(_LW("No valid host found for rebuild"), instance=instance) except exception.UnsupportedPolicyException as ex: with excutils.save_and_reraise_exception(): self._set_vm_state_and_notify(context, instance.uuid, 'rebuild_server', {'vm_state': instance.vm_state, 'task_state': None}, ex, request_spec) LOG.warning(_LW("Server with unsupported policy " "cannot be rebuilt"), instance=instance) compute_utils.notify_about_instance_usage( self.notifier, context, instance, "rebuild.scheduled") self.compute_rpcapi.rebuild_instance(context, instance=instance, new_pass=new_pass, injected_files=injected_files, image_ref=image_ref, orig_image_ref=orig_image_ref, orig_sys_metadata=orig_sys_metadata, bdms=bdms, recreate=recreate, on_shared_storage=on_shared_storage, preserve_ephemeral=preserve_ephemeral, host=host)
def instance_claim(self, context, instance_ref, limits=None): """Indicate that some resources are needed for an upcoming compute instance build operation. This should be called before the compute node is about to perform an instance build operation that will consume additional resources. :param context: security context :param instance_ref: instance to reserve resources for :param limits: Dict of oversubscription limits for memory, disk, and CPUs. :returns: A Claim ticket representing the reserved resources. It can be used to revert the resource usage if an error occurs during the instance build. """ if self.disabled: # compute_driver doesn't support resource tracking, just # set the 'host' and node fields and continue the build: self._set_instance_host_and_node(context, instance_ref) return claims.NopClaim() # sanity checks: if instance_ref['host']: LOG.warning(_LW("Host field should not be set on the instance " "until resources have been claimed."), instance=instance_ref) if instance_ref['node']: LOG.warning(_LW("Node field should not be set on the instance " "until resources have been claimed."), instance=instance_ref) # get memory overhead required to build this instance: overhead = self.driver.estimate_instance_overhead(instance_ref) LOG.debug("Memory overhead for %(flavor)d MB instance; %(overhead)d " "MB", {'flavor': instance_ref['memory_mb'], 'overhead': overhead['memory_mb']}) claim = claims.Claim(context, instance_ref, self, self.compute_node, overhead=overhead, limits=limits) self._set_instance_host_and_node(context, instance_ref) instance_ref['numa_topology'] = claim.claimed_numa_topology # Mark resources in-use and update stats self._update_usage_from_instance(context, self.compute_node, instance_ref) elevated = context.elevated() # persist changes to the compute node: self._update(elevated, self.compute_node) return claim
def __init__(self): LOG.warning( _LW('This key manager is not suitable for use in ' 'production deployments')) self.keys = {}
def unshelve_instance(self, context, instance): sys_meta = instance.system_metadata def safe_image_show(ctx, image_id): if image_id: return self.image_api.get(ctx, image_id, show_deleted=False) else: raise exception.ImageNotFound(image_id='') if instance.vm_state == vm_states.SHELVED: instance.task_state = task_states.POWERING_ON instance.save(expected_task_state=task_states.UNSHELVING) self.compute_rpcapi.start_instance(context, instance) elif instance.vm_state == vm_states.SHELVED_OFFLOADED: image = None image_id = sys_meta.get('shelved_image_id') # No need to check for image if image_id is None as # "shelved_image_id" key is not set for volume backed # instance during the shelve process if image_id: with compute_utils.EventReporter(context, 'get_image_info', instance.uuid): try: image = safe_image_show(context, image_id) except exception.ImageNotFound: instance.vm_state = vm_states.ERROR instance.save() reason = _('Unshelve attempted but the image %s ' 'cannot be found.') % image_id LOG.error(reason, instance=instance) raise exception.UnshelveException( instance_id=instance.uuid, reason=reason) try: with compute_utils.EventReporter(context, 'schedule_instances', instance.uuid): filter_properties = {} scheduler_utils.populate_retry(filter_properties, instance.uuid) hosts = self._schedule_instances(context, image, filter_properties, instance) host_state = hosts[0] scheduler_utils.populate_filter_properties( filter_properties, host_state) (host, node) = (host_state['host'], host_state['nodename']) self.compute_rpcapi.unshelve_instance( context, instance, host, image=image, filter_properties=filter_properties, node=node) except (exception.NoValidHost, exception.UnsupportedPolicyException): instance.task_state = None instance.save() LOG.warning(_LW("No valid host found for unshelve instance"), instance=instance) return except Exception: with excutils.save_and_reraise_exception(): instance.task_state = None instance.save() LOG.error(_LE("Unshelve attempted but an error " "has occurred"), instance=instance) else: LOG.error(_LE('Unshelve attempted but vm_state not SHELVED or ' 'SHELVED_OFFLOADED'), instance=instance) instance.vm_state = vm_states.ERROR instance.save() return
def _consolidate_vmdk_volume(self, instance, vm_ref, device, volume_ref, adapter_type=None, disk_type=None): """Consolidate volume backing VMDK files if needed. The volume's VMDK file attached to an instance can be moved by SDRS if enabled on the cluster. By this the VMDK files can get copied onto another datastore and the copy on this new location will be the latest version of the VMDK file. So at the time of detach, we need to consolidate the current backing VMDK file with the VMDK file in the new location. We need to ensure that the VMDK chain (snapshots) remains intact during the consolidation. SDRS retains the chain when it copies VMDK files over, so for consolidation we relocate the backing with move option as moveAllDiskBackingsAndAllowSharing and then delete the older version of the VMDK file attaching the new version VMDK file. In the case of a volume boot the we need to ensure that the volume is on the datastore of the instance. """ original_device = self._get_vmdk_base_volume_device(volume_ref) original_device_path = original_device.backing.fileName current_device_path = device.backing.fileName if original_device_path == current_device_path: # The volume is not moved from its original location. # No consolidation is required. LOG.debug( "The volume has not been displaced from " "its original location: %s. No consolidation " "needed.", current_device_path) return # The volume has been moved from its original location. # Need to consolidate the VMDK files. LOG.info( _LI("The volume's backing has been relocated to %s. Need to " "consolidate backing disk file."), current_device_path) # Pick the host and resource pool on which the instance resides. # Move the volume to the datastore where the new VMDK file is present. host = self._get_host_of_vm(vm_ref) res_pool = self._get_res_pool_of_host(host) datastore = device.backing.datastore detached = False LOG.debug( "Relocating volume's backing: %(backing)s to resource " "pool: %(rp)s, datastore: %(ds)s, host: %(host)s.", { 'backing': volume_ref, 'rp': res_pool, 'ds': datastore, 'host': host }) try: self._relocate_vmdk_volume(volume_ref, res_pool, datastore, host) except oslo_vmw_exceptions.FileNotFoundException: # Volume's vmdk was moved; remove the device so that we can # relocate the volume. LOG.warn(_LW("Virtual disk: %s of volume's backing not found."), original_device_path, exc_info=True) LOG.debug("Removing disk device of volume's backing and " "reattempting relocate.") self.detach_disk_from_vm(volume_ref, instance, original_device) detached = True self._relocate_vmdk_volume(volume_ref, res_pool, datastore, host) # Volume's backing is relocated now; detach the old vmdk if not done # already. if not detached: self.detach_disk_from_vm(volume_ref, instance, original_device, destroy_disk=True) # Attach the current volume to the volume_ref self.attach_disk_to_vm(volume_ref, instance, adapter_type, disk_type, vmdk_path=current_device_path)
def find_multipath_device(device): """Try and discover the multipath device for a volume.""" mdev = None devices = [] out = None try: (out, err) = utils.execute('multipath', '-l', device, run_as_root=True) except processutils.ProcessExecutionError as exc: LOG.warning(_LW("Multipath call failed exit (%(code)s)"), {'code': exc.exit_code}) return None if out: lines = out.strip() lines = lines.split("\n") if lines: line = lines[0] info = line.split(" ") # device line output is different depending # on /etc/multipath.conf settings. if info[1][:2] == "dm": mdev_id = info[0] mdev = '/dev/mapper/%s' % mdev_id elif info[2][:2] == "dm": mdev_id = info[1].replace('(', '') mdev_id = mdev_id.replace(')', '') mdev = '/dev/mapper/%s' % mdev_id if mdev is None: LOG.warning(_LW("Couldn't find multipath device %s"), line) return None LOG.debug("Found multipath device = %s", mdev) device_lines = lines[3:] for dev_line in device_lines: if dev_line.find("policy") != -1: continue if '#' in dev_line: LOG.warning( _LW('Skip faulty line "%(dev_line)s" of' ' multipath device %(mdev)s'), { 'mdev': mdev, 'dev_line': dev_line }) continue dev_line = dev_line.lstrip(' |-`') dev_info = dev_line.split() address = dev_info[0].split(":") dev = { 'device': '/dev/%s' % dev_info[1], 'host': address[0], 'channel': address[1], 'id': address[2], 'lun': address[3] } devices.append(dev) if mdev is not None: info = {"device": mdev, "id": mdev_id, "devices": devices} return info return None
def set_hvdevs(self, devices): """Sync the pci device tracker with hypervisor information. To support pci device hot plug, we sync with the hypervisor periodically, fetching all devices information from hypervisor, update the tracker and sync the DB information. Devices should not be hot-plugged when assigned to a guest, but possibly the hypervisor has no such guarantee. The best we can do is to give a warning if a device is changed or removed while assigned. """ exist_addrs = set([dev['address'] for dev in self.pci_devs]) new_addrs = set([dev['address'] for dev in devices]) for existed in self.pci_devs: if existed['address'] in exist_addrs - new_addrs: try: device.remove(existed) except exception.PciDeviceInvalidStatus as e: LOG.warning(_LW("Trying to remove device with %(status)s " "ownership %(instance_uuid)s because of " "%(pci_exception)s"), {'status': existed.status, 'instance_uuid': existed.instance_uuid, 'pci_exception': e.format_message()}) # Note(yjiang5): remove the device by force so that # db entry is cleaned in next sync. existed.status = 'removed' else: # Note(yjiang5): no need to update stats if an assigned # device is hot removed. self.stats.remove_device(existed) else: new_value = next((dev for dev in devices if dev['address'] == existed['address'])) new_value['compute_node_id'] = self.node_id if existed['status'] in ('claimed', 'allocated'): # Pci properties may change while assigned because of # hotplug or config changes. Although normally this should # not happen. # As the devices have been assigned to a instance, we defer # the change till the instance is destroyed. We will # not sync the new properties with database before that. # TODO(yjiang5): Not sure if this is a right policy, but # at least it avoids some confusion and, if needed, # we can add more action like killing the instance # by force in future. self.stale[new_value['address']] = new_value else: device.update_device(existed, new_value) for dev in [dev for dev in devices if dev['address'] in new_addrs - exist_addrs]: dev['compute_node_id'] = self.node_id dev_obj = objects.PciDevice.create(dev) self.pci_devs.append(dev_obj) self.stats.add_device(dev_obj)
def _log_missing_plugins(self, names): for name in names: if name not in self._mgr.names(): LOG.warning(_LW('Compute resource plugin %s was not loaded'), name)
def _get_os_obj(self, os_name): try: return _OsInfoDatabase.get_instance().get_os(os_name) except exception.NovaException as e: LOG.warning(_LW("Cannot find OS information - Reason: (%s)"), e)
def get_config(self, connection_info, disk_info): """Returns xml for libvirt.""" conf = vconfig.LibvirtConfigGuestDisk() conf.driver_name = libvirt_utils.pick_disk_driver_name( self.connection._host.get_version(), self.is_block_dev) conf.source_device = disk_info['type'] conf.driver_format = "raw" conf.driver_cache = "none" conf.target_dev = disk_info['dev'] conf.target_bus = disk_info['bus'] conf.serial = connection_info.get('serial') # Support for block size tuning data = {} if 'data' in connection_info: data = connection_info['data'] if 'logical_block_size' in data: conf.logical_block_size = data['logical_block_size'] if 'physical_block_size' in data: conf.physical_block_size = data['physical_block_size'] # Extract rate_limit control parameters if 'qos_specs' in data and data['qos_specs']: tune_opts = [ 'total_bytes_sec', 'read_bytes_sec', 'write_bytes_sec', 'total_iops_sec', 'read_iops_sec', 'write_iops_sec' ] specs = data['qos_specs'] if isinstance(specs, dict): for k, v in six.iteritems(specs): if k in tune_opts: new_key = 'disk_' + k setattr(conf, new_key, v) else: LOG.warning( _LW('Unknown content in connection_info/' 'qos_specs: %s'), specs) # Extract access_mode control parameters if 'access_mode' in data and data['access_mode']: access_mode = data['access_mode'] if access_mode in ('ro', 'rw'): conf.readonly = access_mode == 'ro' else: LOG.error( _LE('Unknown content in ' 'connection_info/access_mode: %s'), access_mode) raise exception.InvalidVolumeAccessMode( access_mode=access_mode) # Configure usage of discard if data.get('discard', False) is True: min_qemu = nova.virt.libvirt.driver.MIN_QEMU_DISCARD_VERSION min_libvirt = nova.virt.libvirt.driver.MIN_LIBVIRT_DISCARD_VERSION if self.connection._host.has_min_version(min_libvirt, min_qemu, host.HV_DRIVER_QEMU): conf.driver_discard = 'unmap' else: global SHOULD_LOG_DISCARD_WARNING if SHOULD_LOG_DISCARD_WARNING: SHOULD_LOG_DISCARD_WARNING = False LOG.warning( _LW('Unable to attach %(type)s volume ' '%(serial)s with discard enabled: qemu ' '%(qemu)s and libvirt %(libvirt)s or ' 'later are required.'), { 'qemu': min_qemu, 'libvirt': min_libvirt, 'serial': conf.serial, 'type': connection_info['driver_volume_type'] }) return conf
def _add_floating_ip(self, req, id, body): """Associate floating_ip to an instance.""" context = req.environ['nova.context'] authorize(context) try: address = body['addFloatingIp']['address'] except TypeError: msg = _("Missing parameter dict") raise webob.exc.HTTPBadRequest(explanation=msg) except KeyError: msg = _("Address not specified") raise webob.exc.HTTPBadRequest(explanation=msg) instance = common.get_instance(self.compute_api, context, id) cached_nwinfo = compute_utils.get_nw_info_for_instance(instance) if not cached_nwinfo: msg = _('No nw_info cache associated with instance') raise webob.exc.HTTPBadRequest(explanation=msg) fixed_ips = cached_nwinfo.fixed_ips() if not fixed_ips: msg = _('No fixed ips associated to instance') raise webob.exc.HTTPBadRequest(explanation=msg) fixed_address = None if self.ext_mgr.is_loaded('os-extended-floating-ips'): if 'fixed_address' in body['addFloatingIp']: fixed_address = body['addFloatingIp']['fixed_address'] for fixed in fixed_ips: if fixed['address'] == fixed_address: break else: msg = _('Specified fixed address not assigned to instance') raise webob.exc.HTTPBadRequest(explanation=msg) if not fixed_address: fixed_address = fixed_ips[0]['address'] if len(fixed_ips) > 1: LOG.warn(_LW('multiple fixed_ips exist, using the first: ' '%s'), fixed_address) try: self.network_api.associate_floating_ip(context, instance, floating_address=address, fixed_address=fixed_address) except exception.FloatingIpAssociated: msg = _('floating ip is already associated') raise webob.exc.HTTPBadRequest(explanation=msg) except exception.NoFloatingIpInterface: msg = _('l3driver call to add floating ip failed') raise webob.exc.HTTPBadRequest(explanation=msg) except exception.FloatingIpNotFoundForAddress: msg = _('floating ip not found') raise webob.exc.HTTPNotFound(explanation=msg) except exception.Forbidden as e: raise webob.exc.HTTPForbidden(explanation=e.format_message()) except Exception: msg = _('Error. Unable to associate floating ip') LOG.exception(msg) raise webob.exc.HTTPBadRequest(explanation=msg) return webob.Response(status_int=202)
def __init__(self, virtapi, scheme="https"): super(VMwareVCDriver, self).__init__(virtapi) if (CONF.vmware.host_ip is None or CONF.vmware.host_username is None or CONF.vmware.host_password is None): raise Exception( _("Must specify host_ip, host_username and " "host_password to use vmwareapi.VMwareVCDriver")) self._datastore_regex = None if CONF.vmware.datastore_regex: try: self._datastore_regex = re.compile(CONF.vmware.datastore_regex) except re.error: raise exception.InvalidInput( reason=_("Invalid Regular Expression %s") % CONF.vmware.datastore_regex) self._session = VMwareAPISession(scheme=scheme) self._check_min_version() # Update the PBM location if necessary if CONF.vmware.pbm_enabled: self._update_pbm_location() self._validate_configuration() # Get the list of clusters to be used self._cluster_names = CONF.vmware.cluster_name if len(self._cluster_names) > 1: versionutils.report_deprecated_feature( LOG, _LW('The "cluster_name" setting should have only one ' 'cluster name. The capability of allowing ' 'multiple clusters may be dropped in the ' 'Liberty release.')) self.dict_mors = vm_util.get_all_cluster_refs_by_name( self._session, self._cluster_names) if not self.dict_mors: raise exception.NotFound( _("All clusters specified %s were not" " found in the vCenter") % self._cluster_names) # Check if there are any clusters that were specified in the nova.conf # but are not in the vCenter, for missing clusters log a warning. clusters_found = [ v.get('name') for k, v in six.iteritems(self.dict_mors) ] missing_clusters = set(self._cluster_names) - set(clusters_found) if missing_clusters: LOG.warning( _LW("The following clusters could not be found in the " "vCenter %s"), list(missing_clusters)) self._vcenter_uuid = self._get_vcenter_uuid() # The _resources is used to maintain the vmops, volumeops and vcstate # objects per cluster self._resources = {} self._resource_keys = set() self._virtapi = virtapi self._update_resources() # The following initialization is necessary since the base class does # not use VC state. first_cluster = self._resources.keys()[0] self._vmops = self._resources.get(first_cluster).get('vmops') self._volumeops = self._resources.get(first_cluster).get('volumeops') self._vc_state = self._resources.get(first_cluster).get('vcstate') # Register the OpenStack extension self._register_openstack_extension()
def attach(self, context, instance, volume_api, virt_driver, do_check_attach=True, do_driver_attach=False, **kwargs): volume = volume_api.get(context, self.volume_id) if do_check_attach: volume_api.check_attach(context, volume, instance=instance) volume_id = volume['id'] context = context.elevated() connector = virt_driver.get_volume_connector(instance) connection_info = volume_api.initialize_connection( context, volume_id, connector) if 'serial' not in connection_info: connection_info['serial'] = self.volume_id self._preserve_multipath_id(connection_info) # If do_driver_attach is False, we will attach a volume to an instance # at boot time. So actual attach is done by instance creation code. if do_driver_attach: encryption = encryptors.get_encryption_metadata( context, volume_api, volume_id, connection_info) try: virt_driver.attach_volume(context, connection_info, instance, self['mount_device'], disk_bus=self['disk_bus'], device_type=self['device_type'], encryption=encryption) except Exception: with excutils.save_and_reraise_exception(): LOG.exception(_LE("Driver failed to attach volume " "%(volume_id)s at %(mountpoint)s"), { 'volume_id': volume_id, 'mountpoint': self['mount_device'] }, instance=instance) volume_api.terminate_connection(context, volume_id, connector) self['connection_info'] = connection_info if self.volume_size is None: self.volume_size = volume.get('size') mode = 'rw' if 'data' in connection_info: mode = connection_info['data'].get('access_mode', 'rw') if volume['attach_status'] == "detached": # NOTE(mriedem): save our current state so connection_info is in # the database before the volume status goes to 'in-use' because # after that we can detach and connection_info is required for # detach. self.save() try: volume_api.attach(context, volume_id, instance.uuid, self['mount_device'], mode=mode) except Exception: with excutils.save_and_reraise_exception(): if do_driver_attach: try: virt_driver.detach_volume(connection_info, instance, self['mount_device'], encryption=encryption) except Exception: LOG.warning(_LW( "Driver failed to detach volume " "%(volume_id)s at %(mount_point)s."), { 'volume_id': volume_id, 'mount_point': self['mount_device'] }, exc_info=True, instance=instance) volume_api.terminate_connection(context, volume_id, connector) # Cinder-volume might have completed volume attach. So # we should detach the volume. If the attach did not # happen, the detach request will be ignored. volume_api.detach(context, volume_id)
def _handle_instance_id_request(self, req): instance_id = req.headers.get('X-Instance-ID') tenant_id = req.headers.get('X-Tenant-ID') signature = req.headers.get('X-Instance-ID-Signature') remote_address = req.headers.get('X-Forwarded-For') # Ensure that only one header was passed if instance_id is None: msg = _('X-Instance-ID header is missing from request.') elif tenant_id is None: msg = _('X-Tenant-ID header is missing from request.') elif not isinstance(instance_id, six.string_types): msg = _('Multiple X-Instance-ID headers found within request.') elif not isinstance(tenant_id, six.string_types): msg = _('Multiple X-Tenant-ID headers found within request.') else: msg = None if msg: raise webob.exc.HTTPBadRequest(explanation=msg) expected_signature = hmac.new( CONF.neutron_metadata_proxy_shared_secret, instance_id, hashlib.sha256).hexdigest() if not utils.constant_time_compare(expected_signature, signature): if instance_id: LOG.warn( _LW('X-Instance-ID-Signature: %(signature)s does ' 'not match the expected value: ' '%(expected_signature)s for id: %(instance_id)s.' ' Request From: %(remote_address)s'), { 'signature': signature, 'expected_signature': expected_signature, 'instance_id': instance_id, 'remote_address': remote_address }) msg = _('Invalid proxy request signature.') raise webob.exc.HTTPForbidden(explanation=msg) try: meta_data = self.get_metadata_by_instance_id( instance_id, remote_address) except Exception: LOG.exception(_('Failed to get metadata for instance id: %s'), instance_id) msg = _('An unknown error has occurred. ' 'Please try your request again.') raise webob.exc.HTTPInternalServerError(explanation=unicode(msg)) if meta_data is None: LOG.error(_LE('Failed to get metadata for instance id: %s'), instance_id) if meta_data.instance['project_id'] != tenant_id: LOG.warn( _LW("Tenant_id %(tenant_id)s does not match tenant_id " "of instance %(instance_id)s."), { 'tenant_id': tenant_id, 'instance_id': instance_id }) # causes a 404 to be raised meta_data = None return meta_data
def _handle_base_image(self, img_id, base_file): """Handle the checks for a single base image.""" image_bad = False image_in_use = False LOG.info(_LI('image %(id)s at (%(base_file)s): checking'), {'id': img_id, 'base_file': base_file}) if base_file in self.unexplained_images: self.unexplained_images.remove(base_file) if (base_file and os.path.exists(base_file) and os.path.isfile(base_file)): # _verify_checksum returns True if the checksum is ok, and None if # there is no checksum file checksum_result = self._verify_checksum(img_id, base_file) if checksum_result is not None: image_bad = not checksum_result # Give other threads a chance to run time.sleep(0) instances = [] if img_id in self.used_images: local, remote, instances = self.used_images[img_id] if local > 0 or remote > 0: image_in_use = True LOG.info(_LI('image %(id)s at (%(base_file)s): ' 'in use: on this node %(local)d local, ' '%(remote)d on other nodes sharing this instance ' 'storage'), {'id': img_id, 'base_file': base_file, 'local': local, 'remote': remote}) self.active_base_files.append(base_file) if not base_file: LOG.warn(_LW('image %(id)s at (%(base_file)s): warning ' '-- an absent base file is in use! ' 'instances: %(instance_list)s'), {'id': img_id, 'base_file': base_file, 'instance_list': ' '.join(instances)}) if image_bad: self.corrupt_base_files.append(base_file) if base_file: if not image_in_use: LOG.debug('image %(id)s at (%(base_file)s): image is not in ' 'use', {'id': img_id, 'base_file': base_file}) self.removable_base_files.append(base_file) else: LOG.debug('image %(id)s at (%(base_file)s): image is in ' 'use', {'id': img_id, 'base_file': base_file}) if os.path.exists(base_file): libvirt_utils.chown(base_file, os.getuid()) os.utime(base_file, None)
def __init__(self): super(VMRCConsole, self).__init__() LOG.warning(_LW('The ESX driver has been removed! ' 'This code will be removed in Kilo release!'))
def _check_capacity_exceeded(conn, allocs): """Checks to see if the supplied allocation records would result in any of the inventories involved having their capacity exceeded. Raises an InvalidAllocationCapacityExceeded exception if any inventory would be exhausted by the allocation. If no inventories would be exceeded by the allocation, the function returns a list of `ResourceProvider` objects that contain the generation at the time of the check. :param conn: SQLalchemy Connection object to use :param allocs: List of `Allocation` objects to check """ # The SQL generated below looks like this: # SELECT # rp.id, # rp.uuid, # rp.generation, # inv.resource_class_id, # inv.total, # inv.reserved, # inv.allocation_ratio, # allocs.used # FROM resource_providers AS rp # JOIN inventories AS i1 # ON rp.id = i1.resource_provider_id # LEFT JOIN ( # SELECT resource_provider_id, resource_class_id, SUM(used) AS used # FROM allocations # WHERE resource_class_id IN ($RESOURCE_CLASSES) # GROUP BY resource_provider_id, resource_class_id # ) AS allocs # ON inv.resource_provider_id = allocs.resource_provider_id # AND inv.resource_class_id = allocs.resource_class_id # WHERE rp.uuid IN ($RESOURCE_PROVIDERS) # AND inv.resource_class_id IN ($RESOURCE_CLASSES) # # We then take the results of the above and determine if any of the # inventory will have its capacity exceeded. res_classes = set( [fields.ResourceClass.index(a.resource_class) for a in allocs]) provider_uuids = set([a.resource_provider.uuid for a in allocs]) usage = sa.select([ _ALLOC_TBL.c.resource_provider_id, _ALLOC_TBL.c.consumer_id, _ALLOC_TBL.c.resource_class_id, sql.func.sum(_ALLOC_TBL.c.used).label('used') ]) usage = usage.where(_ALLOC_TBL.c.resource_class_id.in_(res_classes)) usage = usage.group_by(_ALLOC_TBL.c.resource_provider_id, _ALLOC_TBL.c.resource_class_id) usage = sa.alias(usage, name='usage') inv_join = sql.join( _RP_TBL, _INV_TBL, sql.and_(_RP_TBL.c.id == _INV_TBL.c.resource_provider_id, _INV_TBL.c.resource_class_id.in_(res_classes))) primary_join = sql.outerjoin( inv_join, usage, sql.and_( _INV_TBL.c.resource_provider_id == usage.c.resource_provider_id, _INV_TBL.c.resource_class_id == usage.c.resource_class_id)) cols_in_output = [ _RP_TBL.c.id.label('resource_provider_id'), _RP_TBL.c.uuid, _RP_TBL.c.generation, _INV_TBL.c.resource_class_id, _INV_TBL.c.total, _INV_TBL.c.reserved, _INV_TBL.c.allocation_ratio, usage.c.used, ] sel = sa.select(cols_in_output).select_from(primary_join) sel = sel.where( sa.and_(_RP_TBL.c.uuid.in_(provider_uuids), _INV_TBL.c.resource_class_id.in_(res_classes))) records = conn.execute(sel) # Create a map keyed by (rp_uuid, res_class) for the records in the DB usage_map = {} provs_with_inv = set() for record in records: map_key = (record['uuid'], record['resource_class_id']) if map_key in usage_map: raise KeyError("%s already in usage_map, bad query" % str(map_key)) usage_map[map_key] = record provs_with_inv.add(record["uuid"]) # Ensure that all providers have existing inventory missing_provs = provider_uuids - provs_with_inv if missing_provs: raise exception.InvalidInventory(resource_class=str(res_classes), resource_provider=missing_provs) res_providers = {} for alloc in allocs: res_class = fields.ResourceClass.index(alloc.resource_class) rp_uuid = alloc.resource_provider.uuid key = (rp_uuid, res_class) usage = usage_map[key] amount_needed = alloc.used allocation_ratio = usage['allocation_ratio'] # usage["used"] can be returned as None used = usage['used'] or 0 capacity = (usage['total'] - usage['reserved']) * allocation_ratio if capacity < (used + amount_needed): LOG.warning( _LW("Over capacity for %(rc)s on resource provider %(rp)s. " "Needed: %(needed)s, Used: %(used)s, Capacity: %(cap)s"), { 'rc': fields.ResourceClass.from_index(res_class), 'rp': rp_uuid, 'needed': amount_needed, 'used': used, 'cap': capacity }) raise exception.InvalidAllocationCapacityExceeded( resource_class=fields.ResourceClass.from_index(res_class), resource_provider=rp_uuid) if rp_uuid not in res_providers: rp = ResourceProvider(id=usage['resource_provider_id'], uuid=rp_uuid, generation=usage['generation']) res_providers[rp_uuid] = rp return list(res_providers.values())
def _do_deprecation_warning(self): LOG.warn( _LW('The VMware ESX driver is now deprecated and has been ' 'removed in the Juno release. The VC driver will remain ' 'and continue to be supported.'))
def pre_live_migration_on_destination(self, src_mig_data, dest_mig_data): """Perform pre live migration steps for the volume on the target host. This method performs any pre live migration that is needed. Certain volume connectors may need to pass data from the source host to the target. This may be required to determine how volumes connect through the Virtual I/O Servers. This method will be called after the pre_live_migration_on_source method. The data from the pre_live call will be passed in via the mig_data. This method should put its output into the dest_mig_data. :param src_mig_data: The migration data from the source server. :param dest_mig_data: The migration data for the destination server. If the volume connector needs to provide information to the live_migration command, it should be added to this dictionary. """ vios_wraps = self.stg_ftsk.feed mgmt_uuid = mgmt.get_mgmt_partition(self.adapter).uuid # Each mapping should attempt to remove itself from the management # partition. for fabric in self._fabric_names(): npiv_port_maps = self._get_fabric_meta(fabric) # Need to first derive the port mappings that can be passed back # to the source system for the live migration call. This tells # the source system what 'vfc mappings' to pass in on the live # migration command. slots = src_mig_data['npiv_fabric_slots_%s' % fabric] fabric_mapping = pvm_vfcm.build_migration_mappings_for_fabric( vios_wraps, self._fabric_ports(fabric), slots) dest_mig_data['npiv_fabric_mapping_%s' % fabric] = fabric_mapping # Next we need to remove the mappings off the mgmt partition. for npiv_port_map in npiv_port_maps: ls = [ LOG.info, _LI("Removing mgmt NPIV mapping for instance " "%(inst)s for fabric %(fabric)s."), { 'inst': self.instance.name, 'fabric': fabric } ] vios_w, vfc_map = pvm_vfcm.find_vios_for_vfc_wwpns( vios_wraps, npiv_port_map[1].split()) if vios_w is not None: # Add the subtask to remove the mapping from the management # partition. task_wrapper = self.stg_ftsk.wrapper_tasks[vios_w.uuid] task_wrapper.add_functor_subtask( pvm_vfcm.remove_maps, mgmt_uuid, client_adpt=vfc_map.client_adapter, logspec=ls) else: LOG.warn( _LW("No storage connections found between the " "Virtual I/O Servers and FC Fabric " "%(fabric)s. The connection might be removed " "already."), {'fabric': fabric}) # TODO(thorst) Find a better place for this execute. Works for now # as the stg_ftsk is all local. Also won't do anything if there # happen to be no fabric changes. self.stg_ftsk.execute() # Collate all of the individual fabric mappings into a single element. full_map = [] for key, value in dest_mig_data.items(): if key.startswith('npiv_fabric_mapping_'): full_map.extend(value) dest_mig_data['vfc_lpm_mappings'] = full_map
def instance_claim(self, context, instance, limits=None): """Indicate that some resources are needed for an upcoming compute instance build operation. This should be called before the compute node is about to perform an instance build operation that will consume additional resources. :param context: security context :param instance: instance to reserve resources for. :type instance: nova.objects.instance.Instance object :param limits: Dict of oversubscription limits for memory, disk, and CPUs. :returns: A Claim ticket representing the reserved resources. It can be used to revert the resource usage if an error occurs during the instance build. """ if self.disabled: # compute_driver doesn't support resource tracking, just # set the 'host' and node fields and continue the build: self._set_instance_host_and_node(instance) return claims.NopClaim() # sanity checks: if instance.host: LOG.warning(_LW("Host field should not be set on the instance " "until resources have been claimed."), instance=instance) if instance.node: LOG.warning(_LW("Node field should not be set on the instance " "until resources have been claimed."), instance=instance) # get the overhead required to build this instance: overhead = self.driver.estimate_instance_overhead(instance) LOG.debug( "Memory overhead for %(flavor)d MB instance; %(overhead)d " "MB", { 'flavor': instance.flavor.memory_mb, 'overhead': overhead['memory_mb'] }) LOG.debug( "Disk overhead for %(flavor)d GB instance; %(overhead)d " "GB", { 'flavor': instance.flavor.root_gb, 'overhead': overhead.get('disk_gb', 0) }) pci_requests = objects.InstancePCIRequests.get_by_instance_uuid( context, instance.uuid) claim = claims.Claim(context, instance, self, self.compute_node, pci_requests, overhead=overhead, limits=limits) # self._set_instance_host_and_node() will save instance to the DB # so set instance.numa_topology first. We need to make sure # that numa_topology is saved while under COMPUTE_RESOURCE_SEMAPHORE # so that the resource audit knows about any cpus we've pinned. instance_numa_topology = claim.claimed_numa_topology instance.numa_topology = instance_numa_topology self._set_instance_host_and_node(instance) if self.pci_tracker: # NOTE(jaypipes): ComputeNode.pci_device_pools is set below # in _update_usage_from_instance(). self.pci_tracker.claim_instance(context, pci_requests, instance_numa_topology) # Mark resources in-use and update stats self._update_usage_from_instance(context, instance) elevated = context.elevated() # persist changes to the compute node: self._update(elevated) return claim
def solve(self, hosts, filter_properties): """This method returns a list of tuples - (host, instance_uuid) that are returned by the solver. Here the assumption is that all instance_uuids have the same requirement as specified in filter_properties. """ host_instance_combinations = [] num_instances = filter_properties['num_instances'] num_hosts = len(hosts) instance_uuids = filter_properties.get('instance_uuids') or [ '(unknown_uuid)' + str(i) for i in xrange(num_instances) ] filter_properties.setdefault('solver_cache', {}) filter_properties['solver_cache'].update({ 'cost_matrix': [], 'constraint_matrix': [] }) cost_matrix = self._get_cost_matrix(hosts, filter_properties) cost_matrix = self._adjust_cost_matrix(cost_matrix) constraint_matrix = self._get_constraint_matrix( hosts, filter_properties) # Create dictionaries mapping temporary host/instance keys to # hosts/instance_uuids. These temorary keys are to be used in the # solving process since we need a convention of lp variable names. host_keys = ['Host' + str(i) for i in xrange(num_hosts)] host_key_map = dict(zip(host_keys, hosts)) instance_num_keys = [ 'InstanceNum' + str(i) for i in xrange(num_instances + 1) ] instance_num_key_map = dict( zip(instance_num_keys, xrange(num_instances + 1))) # create the pulp variables variable_matrix = [[ pulp.LpVariable('HI_' + host_key + '_' + instance_num_key, 0, 1, constants.LpInteger) for instance_num_key in instance_num_keys ] for host_key in host_keys] # create the 'prob' variable to contain the problem data. prob = pulp.LpProblem("Host Instance Scheduler Problem", constants.LpMinimize) # add cost function to pulp solver cost_variables = [ variable_matrix[i][j] for i in xrange(num_hosts) for j in xrange(num_instances + 1) ] cost_coefficients = [ cost_matrix[i][j] for i in xrange(num_hosts) for j in xrange(num_instances + 1) ] prob += (pulp.lpSum([ cost_coefficients[i] * cost_variables[i] for i in xrange(len(cost_variables)) ]), "Sum_Costs") # add constraints to pulp solver for i in xrange(num_hosts): for j in xrange(num_instances + 1): if constraint_matrix[i][j] is False: prob += (variable_matrix[i][j] == 0, "Cons_Host_%s" % i + "_NumInst_%s" % j) # add additional constraints to ensure the problem is valid # (1) non-trivial solution: number of all instances == that requested prob += (pulp.lpSum([ variable_matrix[i][j] * j for i in xrange(num_hosts) for j in xrange(num_instances + 1) ]) == num_instances, "NonTrivialCons") # (2) valid solution: each host is assigned 1 num-instances value for i in xrange(num_hosts): prob += (pulp.lpSum([ variable_matrix[i][j] for j in xrange(num_instances + 1) ]) == 1, "ValidCons_Host_%s" % i) # The problem is solved using PULP's choice of Solver. prob.solve( pulp_solver_classes.PULP_CBC_CMD( maxSeconds=CONF.solver_scheduler.pulp_solver_timeout_seconds)) # Create host-instance tuples from the solutions. if pulp.LpStatus[prob.status] == 'Optimal': num_insts_on_host = {} for v in prob.variables(): if v.name.startswith('HI'): (host_key, instance_num_key ) = v.name.lstrip('HI').lstrip('_').split('_') if v.varValue == 1: num_insts_on_host[host_key] = ( instance_num_key_map[instance_num_key]) instances_iter = iter(instance_uuids) for host_key in host_keys: num_insts_on_this_host = num_insts_on_host.get(host_key, 0) for i in xrange(num_insts_on_this_host): host_instance_combinations.append( (host_key_map[host_key], instances_iter.next())) else: LOG.warn( _LW("Pulp solver didnot find optimal solution! " "reason: %s"), pulp.LpStatus[prob.status]) host_instance_combinations = [] return host_instance_combinations
def unshelve_instance(self, context, instance, request_spec=None): sys_meta = instance.system_metadata def safe_image_show(ctx, image_id): if image_id: return self.image_api.get(ctx, image_id, show_deleted=False) else: raise exception.ImageNotFound(image_id='') if instance.vm_state == vm_states.SHELVED: instance.task_state = task_states.POWERING_ON instance.save(expected_task_state=task_states.UNSHELVING) self.compute_rpcapi.start_instance(context, instance) elif instance.vm_state == vm_states.SHELVED_OFFLOADED: image = None image_id = sys_meta.get('shelved_image_id') # No need to check for image if image_id is None as # "shelved_image_id" key is not set for volume backed # instance during the shelve process if image_id: with compute_utils.EventReporter( context, 'get_image_info', instance.uuid): try: image = safe_image_show(context, image_id) except exception.ImageNotFound: instance.vm_state = vm_states.ERROR instance.save() reason = _('Unshelve attempted but the image %s ' 'cannot be found.') % image_id LOG.error(reason, instance=instance) raise exception.UnshelveException( instance_id=instance.uuid, reason=reason) try: with compute_utils.EventReporter(context, 'schedule_instances', instance.uuid): if not request_spec: # NOTE(sbauza): We were unable to find an original # RequestSpec object - probably because the instance is # old. We need to mock that the old way filter_properties = {} request_spec = scheduler_utils.build_request_spec( context, image, [instance]) else: # NOTE(sbauza): Force_hosts/nodes needs to be reset # if we want to make sure that the next destination # is not forced to be the original host request_spec.reset_forced_destinations() # TODO(sbauza): Provide directly the RequestSpec object # when _schedule_instances(), # populate_filter_properties and populate_retry() # accept it filter_properties = request_spec.\ to_legacy_filter_properties_dict() request_spec = request_spec.\ to_legacy_request_spec_dict() scheduler_utils.populate_retry(filter_properties, instance.uuid) hosts = self._schedule_instances( context, request_spec, filter_properties) host_state = hosts[0] scheduler_utils.populate_filter_properties( filter_properties, host_state) (host, node) = (host_state['host'], host_state['nodename']) self.compute_rpcapi.unshelve_instance( context, instance, host, image=image, filter_properties=filter_properties, node=node) except (exception.NoValidHost, exception.UnsupportedPolicyException): instance.task_state = None instance.save() LOG.warning(_LW("No valid host found for unshelve instance"), instance=instance) return except Exception: with excutils.save_and_reraise_exception(): instance.task_state = None instance.save() LOG.error(_LE("Unshelve attempted but an error " "has occurred"), instance=instance) else: LOG.error(_LE('Unshelve attempted but vm_state not SHELVED or ' 'SHELVED_OFFLOADED'), instance=instance) instance.vm_state = vm_states.ERROR instance.save() return
def new_websocket_client(self): """Called after a new WebSocket connection has been established.""" # Reopen the eventlet hub to make sure we don't share an epoll # fd with parent and/or siblings, which would be bad from eventlet import hubs hubs.use_hub() # The nova expected behavior is to have token # passed to the method GET of the request parse = urlparse.urlparse(self.path) if parse.scheme not in ('http', 'https'): # From a bug in urlparse in Python < 2.7.4 we cannot support # special schemes (cf: http://bugs.python.org/issue9374) if sys.version_info < (2, 7, 4): raise exception.NovaException( _("We do not support scheme '%s' under Python < 2.7.4, " "please use http or https") % parse.scheme) query = parse.query token = urlparse.parse_qs(query).get("token", [""]).pop() if not token: # NoVNC uses it's own convention that forward token # from the request to a cookie header, we should check # also for this behavior hcookie = self.headers.get('cookie') if hcookie: cookie = Cookie.SimpleCookie() for hcookie_part in hcookie.split(';'): hcookie_part = hcookie_part.lstrip() try: cookie.load(hcookie_part) except Cookie.CookieError: # NOTE(stgleb): Do not print out cookie content # for security reasons. LOG.warning(_LW('Found malformed cookie')) else: if 'token' in cookie: token = cookie['token'].value ctxt = context.get_admin_context() rpcapi = consoleauth_rpcapi.ConsoleAuthAPI() connect_info = rpcapi.check_token(ctxt, token=token) if not connect_info: raise exception.InvalidToken(token=token) # Verify Origin expected_origin_hostname = self.headers.get('Host') if ':' in expected_origin_hostname: e = expected_origin_hostname if '[' in e and ']' in e: expected_origin_hostname = e.split(']')[0][1:] else: expected_origin_hostname = e.split(':')[0] expected_origin_hostnames = CONF.console.allowed_origins expected_origin_hostnames.append(expected_origin_hostname) origin_url = self.headers.get('Origin') # missing origin header indicates non-browser client which is OK if origin_url is not None: origin = urlparse.urlparse(origin_url) origin_hostname = origin.hostname origin_scheme = origin.scheme if origin_hostname == '' or origin_scheme == '': detail = _("Origin header not valid.") raise exception.ValidationError(detail=detail) if origin_hostname not in expected_origin_hostnames: detail = _("Origin header does not match this host.") raise exception.ValidationError(detail=detail) if not self.verify_origin_proto(connect_info, origin_scheme): detail = _("Origin header protocol does not match this host.") raise exception.ValidationError(detail=detail) self.msg(_('connect info: %s'), str(connect_info)) host = connect_info['host'] port = int(connect_info['port']) # Connect to the target self.msg(_("connecting to: %(host)s:%(port)s") % {'host': host, 'port': port}) tsock = self.socket(host, port, connect=True) # Handshake as necessary if connect_info.get('internal_access_path'): tsock.send("CONNECT %s HTTP/1.1\r\n\r\n" % connect_info['internal_access_path']) end_token = "\r\n\r\n" while True: data = tsock.recv(4096, socket.MSG_PEEK) token_loc = data.find(end_token) if token_loc != -1: if data.split("\r\n")[0].find("200") == -1: raise exception.InvalidConnectionInfo() # remove the response from recv buffer tsock.recv(token_loc + len(end_token)) break # Start proxying try: self.do_proxy(tsock) except Exception: if tsock: tsock.shutdown(socket.SHUT_RDWR) tsock.close() self.vmsg(_("%(host)s:%(port)s: " "Websocket client or target closed") % {'host': host, 'port': port}) raise
def _node_resource(self, node): """Helper method to create resource dict from node stats.""" vcpus = int(node.properties.get('cpus', 0)) memory_mb = int(node.properties.get('memory_mb', 0)) local_gb = int(node.properties.get('local_gb', 0)) raw_cpu_arch = node.properties.get('cpu_arch', None) try: cpu_arch = arch.canonicalize(raw_cpu_arch) except exception.InvalidArchitectureName: cpu_arch = None if not cpu_arch: LOG.warning(_LW("cpu_arch not defined for node '%s'"), node.uuid) nodes_extra_specs = {} # NOTE(deva): In Havana and Icehouse, the flavor was required to link # to an arch-specific deploy kernel and ramdisk pair, and so the flavor # also had to have extra_specs['cpu_arch'], which was matched against # the ironic node.properties['cpu_arch']. # With Juno, the deploy image(s) may be referenced directly by the # node.driver_info, and a flavor no longer needs to contain any of # these three extra specs, though the cpu_arch may still be used # in a heterogeneous environment, if so desired. # NOTE(dprince): we use the raw cpu_arch here because extra_specs # filters aren't canonicalized nodes_extra_specs['cpu_arch'] = raw_cpu_arch # NOTE(gilliard): To assist with more precise scheduling, if the # node.properties contains a key 'capabilities', we expect the value # to be of the form "k1:v1,k2:v2,etc.." which we add directly as # key/value pairs into the node_extra_specs to be used by the # ComputeCapabilitiesFilter capabilities = node.properties.get('capabilities') if capabilities: for capability in str(capabilities).split(','): parts = capability.split(':') if len(parts) == 2 and parts[0] and parts[1]: nodes_extra_specs[parts[0]] = parts[1] else: LOG.warning( _LW("Ignoring malformed capability '%s'. " "Format should be 'key:val'."), capability) vcpus_used = 0 memory_mb_used = 0 local_gb_used = 0 if node.instance_uuid: # Node has an instance, report all resource as unavailable vcpus_used = vcpus memory_mb_used = memory_mb local_gb_used = local_gb elif self._node_resources_unavailable(node): # The node's current state is such that it should not present any # of its resources to Nova vcpus = 0 memory_mb = 0 local_gb = 0 dic = { 'node': str(node.uuid), 'hypervisor_hostname': str(node.uuid), 'hypervisor_type': self._get_hypervisor_type(), 'hypervisor_version': self._get_hypervisor_version(), # The Ironic driver manages multiple hosts, so there are # likely many different CPU models in use. As such it is # impossible to provide any meaningful info on the CPU # model of the "host" 'cpu_info': None, 'vcpus': vcpus, 'vcpus_used': vcpus_used, 'local_gb': local_gb, 'local_gb_used': local_gb_used, 'disk_total': local_gb, 'disk_used': local_gb_used, 'disk_available': local_gb - local_gb_used, 'memory_mb': memory_mb, 'memory_mb_used': memory_mb_used, 'host_memory_total': memory_mb, 'host_memory_free': memory_mb - memory_mb_used, 'supported_instances': jsonutils.dumps(_get_nodes_supported_instances(cpu_arch)), 'stats': jsonutils.dumps(nodes_extra_specs), 'host': CONF.host, } dic.update(nodes_extra_specs) return dic
def map_power_state(state): try: return _POWER_STATE_MAP[state] except KeyError: LOG.warning(_LW("Power state %s not found."), state) return power_state.NOSTATE
def _add_floating_ip(self, req, id, body): """Associate floating_ip to an instance.""" context = req.environ['nova.context'] authorize(context) try: address = body['addFloatingIp']['address'] except TypeError: msg = _("Missing parameter dict") raise webob.exc.HTTPBadRequest(explanation=msg) except KeyError: msg = _("Address not specified") raise webob.exc.HTTPBadRequest(explanation=msg) instance = common.get_instance(self.compute_api, context, id) cached_nwinfo = compute_utils.get_nw_info_for_instance(instance) if not cached_nwinfo: LOG.warning(_LW('Info cache is %r during associate') % instance.info_cache, instance=instance) msg = _('No nw_info cache associated with instance') raise webob.exc.HTTPBadRequest(explanation=msg) fixed_ips = cached_nwinfo.fixed_ips() if not fixed_ips: msg = _('No fixed IPs associated to instance') raise webob.exc.HTTPBadRequest(explanation=msg) fixed_address = None if self.ext_mgr.is_loaded('os-extended-floating-ips'): if 'fixed_address' in body['addFloatingIp']: fixed_address = body['addFloatingIp']['fixed_address'] for fixed in fixed_ips: if fixed['address'] == fixed_address: break else: msg = _('Specified fixed address not assigned to instance') raise webob.exc.HTTPBadRequest(explanation=msg) if not fixed_address: try: fixed_address = next(ip['address'] for ip in fixed_ips if netaddr.valid_ipv4(ip['address'])) except StopIteration: msg = _('Unable to associate floating IP %(address)s ' 'to any fixed IPs for instance %(id)s. ' 'Instance has no fixed IPv4 addresses to ' 'associate.') % ({ 'address': address, 'id': id }) raise webob.exc.HTTPBadRequest(explanation=msg) if len(fixed_ips) > 1: LOG.warning( _LW('multiple fixed_ips exist, using the first ' 'IPv4 fixed_ip: %s'), fixed_address) try: self.network_api.associate_floating_ip(context, instance, floating_address=address, fixed_address=fixed_address) except exception.FloatingIpAssociated: msg = _('floating IP is already associated') raise webob.exc.HTTPBadRequest(explanation=msg) except exception.NoFloatingIpInterface: msg = _('l3driver call to add floating IP failed') raise webob.exc.HTTPBadRequest(explanation=msg) except exception.FloatingIpNotFoundForAddress: msg = _('floating IP not found') raise webob.exc.HTTPNotFound(explanation=msg) except exception.Forbidden as e: raise webob.exc.HTTPForbidden(explanation=e.format_message()) except Exception as e: msg = _('Unable to associate floating IP %(address)s to ' 'fixed IP %(fixed_address)s for instance %(id)s. ' 'Error: %(error)s') % ({ 'address': address, 'fixed_address': fixed_address, 'id': id, 'error': e }) LOG.exception(msg) raise webob.exc.HTTPBadRequest(explanation=msg) return webob.Response(status_int=202)
def _update_from_compute_node(self, compute): """Update information about a host from a ComputeNode object.""" if (self.updated and compute.updated_at and self.updated > compute.updated_at): return all_ram_mb = compute.memory_mb # Assume virtual size is all consumed by instances if use qcow2 disk. free_gb = compute.free_disk_gb least_gb = compute.disk_available_least if least_gb is not None: if least_gb > free_gb: # can occur when an instance in database is not on host LOG.warning( _LW("Host %(hostname)s has more disk space than " "database expected " "(%(physical)s GB > %(database)s GB)"), { 'physical': least_gb, 'database': free_gb, 'hostname': compute.hypervisor_hostname }) free_gb = min(least_gb, free_gb) free_disk_mb = free_gb * 1024 self.disk_mb_used = compute.local_gb_used * 1024 # NOTE(jogo) free_ram_mb can be negative self.free_ram_mb = compute.free_ram_mb self.total_usable_ram_mb = all_ram_mb self.total_usable_disk_gb = compute.local_gb self.free_disk_mb = free_disk_mb self.vcpus_total = compute.vcpus self.vcpus_used = compute.vcpus_used self.updated = compute.updated_at self.numa_topology = compute.numa_topology self.pci_stats = pci_stats.PciDeviceStats(compute.pci_device_pools) # All virt drivers report host_ip self.host_ip = compute.host_ip self.hypervisor_type = compute.hypervisor_type self.hypervisor_version = compute.hypervisor_version self.hypervisor_hostname = compute.hypervisor_hostname self.cpu_info = compute.cpu_info if compute.supported_hv_specs: self.supported_instances = [ spec.to_list() for spec in compute.supported_hv_specs ] else: self.supported_instances = [] # Don't store stats directly in host_state to make sure these don't # overwrite any values, or get overwritten themselves. Store in self so # filters can schedule with them. self.stats = compute.stats or {} # Track number of instances on host self.num_instances = int(self.stats.get('num_instances', 0)) self.num_io_ops = int(self.stats.get('io_workload', 0)) # update metrics self.metrics = objects.MonitorMetricList.from_json(compute.metrics) # update allocation ratios given by the ComputeNode object self.cpu_allocation_ratio = compute.cpu_allocation_ratio self.ram_allocation_ratio = compute.ram_allocation_ratio self.disk_allocation_ratio = compute.disk_allocation_ratio
def rebuild_instance(self, context, instance, orig_image_ref, image_ref, injected_files, new_pass, orig_sys_metadata, bdms, recreate, on_shared_storage, preserve_ephemeral=False, host=None): with compute_utils.EventReporter(context, 'rebuild_server', instance.uuid): if not host: # NOTE(lcostantino): Retrieve scheduler filters for the # instance when the feature is available filter_properties = {'ignore_hosts': [instance.host]} request_spec = scheduler_utils.build_request_spec( context, image_ref, [instance]) try: scheduler_utils.setup_instance_group( context, request_spec, filter_properties) hosts = self.scheduler_client.select_destinations( context, request_spec, filter_properties) host = hosts.pop(0)['host'] except exception.NoValidHost as ex: with excutils.save_and_reraise_exception(): self._set_vm_state_and_notify( context, instance.uuid, 'rebuild_server', { 'vm_state': instance.vm_state, 'task_state': None }, ex, request_spec) LOG.warning(_LW("No valid host found for rebuild"), instance=instance) except exception.UnsupportedPolicyException as ex: with excutils.save_and_reraise_exception(): self._set_vm_state_and_notify( context, instance.uuid, 'rebuild_server', { 'vm_state': instance.vm_state, 'task_state': None }, ex, request_spec) LOG.warning(_LW("Server with unsupported policy " "cannot be rebuilt"), instance=instance) compute_utils.notify_about_instance_usage(self.notifier, context, instance, "rebuild.scheduled") self.compute_rpcapi.rebuild_instance( context, instance=instance, new_pass=new_pass, injected_files=injected_files, image_ref=image_ref, orig_image_ref=orig_image_ref, orig_sys_metadata=orig_sys_metadata, bdms=bdms, recreate=recreate, on_shared_storage=on_shared_storage, preserve_ephemeral=preserve_ephemeral, host=host)
def update_from_compute_node(self, compute): """Update information about a host from its compute_node info.""" if (self.updated and compute['updated_at'] and self.updated > compute['updated_at']): return all_ram_mb = compute['memory_mb'] # Assume virtual size is all consumed by instances if use qcow2 disk. free_gb = compute['free_disk_gb'] least_gb = compute.get('disk_available_least') if least_gb is not None: if least_gb > free_gb: # can occur when an instance in database is not on host LOG.warning(_LW("Host %(hostname)s has more disk space than " "database expected " "(%(physical)sgb > %(database)sgb)"), {'physical': least_gb, 'database': free_gb, 'hostname': compute['hypervisor_hostname']}) free_gb = min(least_gb, free_gb) free_disk_mb = free_gb * 1024 self.disk_mb_used = compute['local_gb_used'] * 1024 # NOTE(jogo) free_ram_mb can be negative self.all_freq = compute.get('all_freq') self.curr_freq = compute['curr_freq'] self.max_freq = compute['max_freq'] self.free_ram_mb = compute['free_ram_mb'] self.total_usable_ram_mb = all_ram_mb self.total_usable_disk_gb = compute['local_gb'] self.free_disk_mb = free_disk_mb self.vcpus_total = compute['vcpus'] self.vcpus_used = compute['vcpus_used'] self.updated = compute['updated_at'] self.numa_topology = compute['numa_topology'] if 'pci_stats' in compute: self.pci_stats = pci_stats.PciDeviceStats(compute['pci_stats']) else: self.pci_stats = None # All virt drivers report host_ip self.host_ip = compute['host_ip'] self.hypervisor_type = compute.get('hypervisor_type') self.hypervisor_version = compute.get('hypervisor_version') self.hypervisor_hostname = compute.get('hypervisor_hostname') self.cpu_info = compute.get('cpu_info') if compute.get('supported_instances'): self.supported_instances = jsonutils.loads( compute.get('supported_instances')) # Don't store stats directly in host_state to make sure these don't # overwrite any values, or get overwritten themselves. Store in self so # filters can schedule with them. stats = compute.get('stats', None) or '{}' self.stats = jsonutils.loads(stats) # Track number of instances on host self.num_instances = int(self.stats.get('num_instances', 0)) self.num_io_ops = int(self.stats.get('io_workload', 0)) # update metrics self._update_metrics_from_compute_node(compute)
def snapshot(self, context, instance, image_id, update_task_state): """Create snapshot from a running VM instance.""" instance_name = instance.name LOG.debug("Creating snapshot for instance %s", instance_name) snapshot_path = self._vmutils.take_vm_snapshot(instance_name) update_task_state(task_state=task_states.IMAGE_PENDING_UPLOAD) export_dir = None try: src_vhd_path = self._pathutils.lookup_root_vhd_path(instance_name) LOG.debug("Getting info for VHD %s", src_vhd_path) src_base_disk_path = self._vhdutils.get_vhd_parent_path( src_vhd_path) export_dir = self._pathutils.get_export_dir(instance_name) dest_vhd_path = os.path.join(export_dir, os.path.basename(src_vhd_path)) LOG.debug('Copying VHD %(src_vhd_path)s to %(dest_vhd_path)s', { 'src_vhd_path': src_vhd_path, 'dest_vhd_path': dest_vhd_path }) self._pathutils.copyfile(src_vhd_path, dest_vhd_path) image_vhd_path = None if not src_base_disk_path: image_vhd_path = dest_vhd_path else: basename = os.path.basename(src_base_disk_path) dest_base_disk_path = os.path.join(export_dir, basename) LOG.debug( 'Copying base disk %(src_vhd_path)s to ' '%(dest_base_disk_path)s', { 'src_vhd_path': src_vhd_path, 'dest_base_disk_path': dest_base_disk_path }) self._pathutils.copyfile(src_base_disk_path, dest_base_disk_path) LOG.debug( "Reconnecting copied base VHD " "%(dest_base_disk_path)s and diff " "VHD %(dest_vhd_path)s", { 'dest_base_disk_path': dest_base_disk_path, 'dest_vhd_path': dest_vhd_path }) self._vhdutils.reconnect_parent_vhd(dest_vhd_path, dest_base_disk_path) LOG.debug( "Merging base disk %(dest_base_disk_path)s and " "diff disk %(dest_vhd_path)s", { 'dest_base_disk_path': dest_base_disk_path, 'dest_vhd_path': dest_vhd_path }) self._vhdutils.merge_vhd(dest_vhd_path, dest_base_disk_path) image_vhd_path = dest_base_disk_path LOG.debug( "Updating Glance image %(image_id)s with content from " "merged disk %(image_vhd_path)s", { 'image_id': image_id, 'image_vhd_path': image_vhd_path }) update_task_state(task_state=task_states.IMAGE_UPLOADING, expected_state=task_states.IMAGE_PENDING_UPLOAD) self._save_glance_image(context, image_id, image_vhd_path) LOG.debug( "Snapshot image %(image_id)s updated for VM " "%(instance_name)s", { 'image_id': image_id, 'instance_name': instance_name }) finally: try: LOG.debug("Removing snapshot %s", image_id) self._vmutils.remove_vm_snapshot(snapshot_path) except Exception as ex: LOG.exception(ex) LOG.warning(_LW('Failed to remove snapshot for VM %s'), instance_name) if export_dir: LOG.debug('Removing directory: %s', export_dir) self._pathutils.rmtree(export_dir)
def rebuild_instance(self, context, instance, orig_image_ref, image_ref, injected_files, new_pass, orig_sys_metadata, bdms, recreate, on_shared_storage, preserve_ephemeral=False, host=None, request_spec=None): with compute_utils.EventReporter(context, 'rebuild_server', instance.uuid): node = limits = None if not host: if not request_spec: # NOTE(sbauza): We were unable to find an original # RequestSpec object - probably because the instance is old # We need to mock that the old way filter_properties = {'ignore_hosts': [instance.host]} request_spec = scheduler_utils.build_request_spec( context, image_ref, [instance]) else: # NOTE(sbauza): Augment the RequestSpec object by excluding # the source host for avoiding the scheduler to pick it request_spec.ignore_hosts = request_spec.ignore_hosts or [] request_spec.ignore_hosts.append(instance.host) # NOTE(sbauza): Force_hosts/nodes needs to be reset # if we want to make sure that the next destination # is not forced to be the original host request_spec.reset_forced_destinations() # TODO(sbauza): Provide directly the RequestSpec object # when _schedule_instances() and _set_vm_state_and_notify() # accept it filter_properties = request_spec.\ to_legacy_filter_properties_dict() request_spec = request_spec.to_legacy_request_spec_dict() try: hosts = self._schedule_instances( context, request_spec, filter_properties) host_dict = hosts.pop(0) host, node, limits = (host_dict['host'], host_dict['nodename'], host_dict['limits']) except exception.NoValidHost as ex: with excutils.save_and_reraise_exception(): self._set_vm_state_and_notify(context, instance.uuid, 'rebuild_server', {'vm_state': instance.vm_state, 'task_state': None}, ex, request_spec) LOG.warning(_LW("No valid host found for rebuild"), instance=instance) except exception.UnsupportedPolicyException as ex: with excutils.save_and_reraise_exception(): self._set_vm_state_and_notify(context, instance.uuid, 'rebuild_server', {'vm_state': instance.vm_state, 'task_state': None}, ex, request_spec) LOG.warning(_LW("Server with unsupported policy " "cannot be rebuilt"), instance=instance) try: migration = objects.Migration.get_by_instance_and_status( context, instance.uuid, 'accepted') except exception.MigrationNotFoundByStatus: LOG.debug("No migration record for the rebuild/evacuate " "request.", instance=instance) migration = None compute_utils.notify_about_instance_usage( self.notifier, context, instance, "rebuild.scheduled") self.compute_rpcapi.rebuild_instance(context, instance=instance, new_pass=new_pass, injected_files=injected_files, image_ref=image_ref, orig_image_ref=orig_image_ref, orig_sys_metadata=orig_sys_metadata, bdms=bdms, recreate=recreate, on_shared_storage=on_shared_storage, preserve_ephemeral=preserve_ephemeral, migration=migration, host=host, node=node, limits=limits)
def __init__(self): super(VMRCSessionConsole, self).__init__() LOG.warning(_LW('This code will be removed in Kilo release!'))