def _log_and_attach(bdm): context = attach_args[0] instance = attach_args[1] if bdm.get('volume_id'): LOG.info(_LI('Booting with volume %(volume_id)s at ' '%(mountpoint)s'), { 'volume_id': bdm.volume_id, 'mountpoint': bdm['mount_device'] }, context=context, instance=instance) elif bdm.get('snapshot_id'): LOG.info(_LI('Booting with volume snapshot %(snapshot_id)s at ' '%(mountpoint)s'), { 'snapshot_id': bdm.snapshot_id, 'mountpoint': bdm['mount_device'] }, context=context, instance=instance) elif bdm.get('image_id'): LOG.info(_LI('Booting with volume-backed-image %(image_id)s at ' '%(mountpoint)s'), { 'image_id': bdm.image_id, 'mountpoint': bdm['mount_device'] }, context=context, instance=instance) else: LOG.info(_LI('Booting with blank volume at %(mountpoint)s'), {'mountpoint': bdm['mount_device']}, context=context, instance=instance) bdm.attach(*attach_args, **attach_kwargs)
def __exit__(self, ex_type, ex_value, ex_traceback): if not ex_value: return True if isinstance(ex_value, exception.Forbidden): raise Fault(webob.exc.HTTPForbidden( explanation=ex_value.format_message())) elif isinstance(ex_value, exception.VersionNotFoundForAPIMethod): raise elif isinstance(ex_value, exception.Invalid): raise Fault(exception.ConvertedException( code=ex_value.code, explanation=ex_value.format_message())) elif isinstance(ex_value, TypeError): exc_info = (ex_type, ex_value, ex_traceback) LOG.error(_LE('Exception handling resource: %s'), ex_value, exc_info=exc_info) raise Fault(webob.exc.HTTPBadRequest()) elif isinstance(ex_value, Fault): LOG.info(_LI("Fault thrown: %s"), ex_value) raise ex_value elif isinstance(ex_value, webob.exc.HTTPException): LOG.info(_LI("HTTP exception thrown: %s"), ex_value) raise Fault(ex_value) # We didn't handle the exception return False
def update_available_resource(self, context): """Override in-memory calculations of compute node resource usage based on data audited from the hypervisor layer. Add in resource claims in progress to account for operations that have declared a need for resources, but not necessarily retrieved them from the hypervisor layer yet. """ LOG.info(_LI("Auditing locally available compute resources for " "node %(node)s"), {'node': self.nodename}) resources = self.driver.get_available_resource(self.nodename) if not resources: # The virt driver does not support this function LOG.info(_LI("Virt driver does not support " "'get_available_resource'. Compute tracking is disabled.")) self.compute_node = None return resources['host_ip'] = CONF.my_ip # We want the 'cpu_info' to be None from the POV of the # virt driver, but the DB requires it to be non-null so # just force it to empty string if "cpu_info" not in resources or resources["cpu_info"] is None: resources["cpu_info"] = '' self._verify_resources(resources) self._report_hypervisor_resource_view(resources) self._update_available_resource(context, resources)
def handshake(self, req, connect_info, sockets): """Execute hypervisor-specific vnc auth handshaking (if needed).""" host = connect_info['host'] port = int(connect_info['port']) server = eventlet.connect((host, port)) # Handshake as necessary if connect_info.get('internal_access_path'): server.sendall("CONNECT %s HTTP/1.1\r\n\r\n" % connect_info['internal_access_path']) data = "" while True: b = server.recv(1) if b: data += b if data.find("\r\n\r\n") != -1: if not data.split("\r\n")[0].find("200"): LOG.info(_LI("Error in handshake format: %s"), data) return break if not b or len(data) > 4096: LOG.info(_LI("Error in handshake: %s"), data) return client = req.environ['eventlet.input'].get_socket() client.sendall("HTTP/1.1 200 OK\r\n\r\n") sockets['client'] = client sockets['server'] = server
def _test(self, type_, unit, total, used, requested, limit): """Test if the given type of resource needed for a claim can be safely allocated. """ LOG.info(_LI('Total %(type)s: %(total)d %(unit)s, used: %(used).02f ' '%(unit)s'), {'type': type_, 'total': total, 'unit': unit, 'used': used}, instance=self.instance) if limit is None: # treat resource as unlimited: LOG.info(_LI('%(type)s limit not specified, defaulting to ' 'unlimited'), {'type': type_}, instance=self.instance) return free = limit - used # Oversubscribed resource policy info: LOG.info(_LI('%(type)s limit: %(limit).02f %(unit)s, ' 'free: %(free).02f %(unit)s'), {'type': type_, 'limit': limit, 'free': free, 'unit': unit}, instance=self.instance) if requested > free: return (_('Free %(type)s %(free).02f ' '%(unit)s < requested %(requested)d %(unit)s') % {'type': type_, 'free': free, 'unit': unit, 'requested': requested})
def _log_and_attach(bdm): context = attach_args[0] instance = attach_args[1] if bdm.get('volume_id'): LOG.info(_LI('Booting with volume %(volume_id)s at ' '%(mountpoint)s'), {'volume_id': bdm.volume_id, 'mountpoint': bdm['mount_device']}, context=context, instance=instance) elif bdm.get('snapshot_id'): LOG.info(_LI('Booting with volume snapshot %(snapshot_id)s at ' '%(mountpoint)s'), {'snapshot_id': bdm.snapshot_id, 'mountpoint': bdm['mount_device']}, context=context, instance=instance) elif bdm.get('image_id'): LOG.info(_LI('Booting with volume-backed-image %(image_id)s at ' '%(mountpoint)s'), {'image_id': bdm.image_id, 'mountpoint': bdm['mount_device']}, context=context, instance=instance) else: LOG.info(_LI('Booting with blank volume at %(mountpoint)s'), {'mountpoint': bdm['mount_device']}, context=context, instance=instance) bdm.attach(*attach_args, **attach_kwargs)
def __exit__(self, ex_type, ex_value, ex_traceback): if not ex_value: return True if isinstance(ex_value, exception.Forbidden): raise Fault( webob.exc.HTTPForbidden(explanation=ex_value.format_message())) elif isinstance(ex_value, exception.VersionNotFoundForAPIMethod): raise elif isinstance(ex_value, exception.Invalid): raise Fault( exception.ConvertedException( code=ex_value.code, explanation=ex_value.format_message())) elif isinstance(ex_value, TypeError): exc_info = (ex_type, ex_value, ex_traceback) LOG.error(_LE('Exception handling resource: %s'), ex_value, exc_info=exc_info) raise Fault(webob.exc.HTTPBadRequest()) elif isinstance(ex_value, Fault): LOG.info(_LI("Fault thrown: %s"), ex_value) raise ex_value elif isinstance(ex_value, webob.exc.HTTPException): LOG.info(_LI("HTTP exception thrown: %s"), ex_value) raise Fault(ex_value) # We didn't handle the exception return False
def update_available_resource(self, context): """Override in-memory calculations of compute node resource usage based on data audited from the hypervisor layer. Add in resource claims in progress to account for operations that have declared a need for resources, but not necessarily retrieved them from the hypervisor layer yet. """ LOG.info( _LI("Auditing locally available compute resources for " "node %(node)s"), {'node': self.nodename}) resources = self.driver.get_available_resource(self.nodename) if not resources: # The virt driver does not support this function LOG.info( _LI("Virt driver does not support " "'get_available_resource'. Compute tracking is disabled.")) self.compute_node = None return resources['host_ip'] = CONF.my_ip # We want the 'cpu_info' to be None from the POV of the # virt driver, but the DB requires it to be non-null so # just force it to empty string if "cpu_info" not in resources or resources["cpu_info"] is None: resources["cpu_info"] = '' self._verify_resources(resources) self._report_hypervisor_resource_view(resources) self._update_available_resource(context, resources)
def sync_instance_info(self, context, host_name, instance_uuids): """Receives the uuids of the instances on a host. This method is periodically called by the compute nodes, which send a list of all the UUID values for the instances on that node. This is used by the scheduler's HostManager to detect when its view of the compute node's instances is out of sync. """ host_info = self._instance_info.get(host_name) if host_info: local_set = set(host_info["instances"].keys()) compute_set = set(instance_uuids) if not local_set == compute_set: self._recreate_instance_info(context, host_name) LOG.info( _LI("The instance sync for host '%s' did not match. " "Re-created its InstanceList."), host_name) return host_info["updated"] = True LOG.info(_LI("Successfully synced instances from host '%s'."), host_name) else: self._recreate_instance_info(context, host_name) LOG.info( _LI("Received a sync request from an unknown host '%s'. " "Re-created its InstanceList."), host_name)
def _match_forced_nodes(host_map, nodes_to_force): forced_nodes = [] for (hostname, nodename) in list(host_map.keys()): if nodename not in nodes_to_force: del host_map[(hostname, nodename)] else: forced_nodes.append(nodename) if host_map: forced_nodes_str = ', '.join(forced_nodes) msg = _LI('Host filter forcing available nodes to %s') else: forced_nodes_str = ', '.join(nodes_to_force) msg = _LI("No nodes matched due to not matching " "'force_nodes' value of '%s'") LOG.info(msg % forced_nodes_str)
def _match_forced_nodes(host_map, nodes_to_force): forced_nodes = [] for (hostname, nodename) in list(host_map.keys()): if nodename not in nodes_to_force: del host_map[(hostname, nodename)] else: forced_nodes.append(nodename) if host_map: forced_nodes_str = ', '.join(forced_nodes) msg = _LI('Host filter forcing available nodes to %s') else: forced_nodes_str = ', '.join(nodes_to_force) msg = _LI("No nodes matched due to not matching " "'force_nodes' value of '%s'") LOG.info(msg % forced_nodes_str)
def migrate_instance_finish(self, context, instance_uuid, floating_addresses, host=None, rxtx_factor=None, project_id=None, source=None, dest=None): # We only care if floating_addresses are provided and we're # switching hosts if host and not dest: dest = host if not floating_addresses or (source and source == dest): return LOG.info(_LI("Finishing migration network for instance %s"), instance_uuid) for address in floating_addresses: floating_ip = objects.FloatingIP.get_by_address(context, address) if self._is_stale_floating_ip_address(context, floating_ip): LOG.warning(_LW("Floating IP address |%(address)s| no longer " "belongs to instance %(instance_uuid)s. " "Will not setup it."), {'address': address, 'instance_uuid': instance_uuid}) continue floating_ip.host = dest floating_ip.save() interface = CONF.public_interface or floating_ip.interface fixed_ip = floating_ip.fixed_ip self.l3driver.add_floating_ip(floating_ip.address, fixed_ip.address, interface, fixed_ip.network)
def _error(self, inner, req): if not isinstance(inner, exception.QuotaError): LOG.exception(_LE("Caught error: %(type)s %(error)s"), {'type': type(inner), 'error': inner}) safe = getattr(inner, 'safe', False) headers = getattr(inner, 'headers', None) status = getattr(inner, 'code', 500) if status is None: status = 500 msg_dict = dict(url=req.url, status=status) LOG.info(_LI("%(url)s returned with HTTP %(status)d"), msg_dict) outer = self.status_to_type(status) if headers: outer.headers = headers # NOTE(johannes): We leave the explanation empty here on # purpose. It could possibly have sensitive information # that should not be returned back to the user. See # bugs 868360 and 874472 # NOTE(eglynn): However, it would be over-conservative and # inconsistent with the EC2 API to hide every exception, # including those that are safe to expose, see bug 1021373 if safe: msg = (inner.msg if isinstance(inner, exception.JacketException) else six.text_type(inner)) params = {'exception': inner.__class__.__name__, 'explanation': msg} outer.explanation = _('%(exception)s: %(explanation)s') % params return wsgi.Fault(outer)
def _preserve_multipath_id(self, connection_info): if self['connection_info'] and 'data' in self['connection_info']: if 'multipath_id' in self['connection_info']['data']: connection_info['data']['multipath_id'] =\ self['connection_info']['data']['multipath_id'] LOG.info(_LI('preserve multipath_id %s'), connection_info['data']['multipath_id'])
def migrate_instance_start(self, context, instance_uuid, floating_addresses, rxtx_factor=None, project_id=None, source=None, dest=None): # We only care if floating_addresses are provided and we're # switching hosts if not floating_addresses or (source and source == dest): return LOG.info(_LI("Starting migration network for instance %s"), instance_uuid) for address in floating_addresses: floating_ip = objects.FloatingIP.get_by_address(context, address) if self._is_stale_floating_ip_address(context, floating_ip): LOG.warning(_LW("Floating IP address |%(address)s| no longer " "belongs to instance %(instance_uuid)s. " "Will not migrate it "), {'address': address, 'instance_uuid': instance_uuid}) continue interface = CONF.public_interface or floating_ip.interface fixed_ip = floating_ip.fixed_ip self.l3driver.remove_floating_ip(floating_ip.address, fixed_ip.address, interface, fixed_ip.network) # NOTE(wenjianhn): Make this address will not be bound to public # interface when restarts compute-network on dest compute node floating_ip.host = None floating_ip.save()
def _reclaim_queued_deletes(self, context): """Reclaim instances that are queued for deletion.""" interval = CONF.reclaim_instance_interval if interval <= 0: LOG.debug("CONF.reclaim_instance_interval <= 0, skipping...") return # TODO(comstud, jichenjc): Dummy quota object for now See bug 1296414. # The only case that the quota might be inconsistent is # the cloud node died between set instance state to SOFT_DELETED # and quota commit to DB. When cloud node starts again # it will have no idea the reservation is committed or not or even # expired, since it's a rare case, so marked as todo. quotas = objects.Quotas.from_reservations(context, None) filters = {'vm_state': vm_states.SOFT_DELETED, 'task_state': None, 'host': self.host} instances = objects.InstanceList.get_by_filters( context, filters, expected_attrs=objects.instance.INSTANCE_DEFAULT_FIELDS, use_slave=True) for instance in instances: if self._deleted_old_enough(instance, interval): bdms = objects.BlockDeviceMappingList.get_by_instance_uuid( context, instance.uuid) LOG.info(_LI('Reclaiming deleted instance'), instance=instance) try: self._delete_instance(context, instance, bdms, quotas) except Exception as e: LOG.warning(_LW("Periodic reclaim failed to delete " "instance: %s"), e, instance=instance)
def _report_state(self, service): """Update the state of this service in the datastore.""" try: service.service_ref.report_count += 1 service.service_ref.save() # TODO(termie): make this pattern be more elegant. if getattr(service, 'model_disconnected', False): service.model_disconnected = False LOG.info(_LI('Recovered from being unable to report status.')) except messaging.MessagingTimeout: # NOTE(johngarbutt) during upgrade we will see messaging timeouts # as compute-conductor is restarted, so only log this error once. if not getattr(service, 'model_disconnected', False): service.model_disconnected = True LOG.warn( _LW('Lost connection to compute-conductor ' 'for reporting service status.')) except Exception: # NOTE(rpodolyaka): we'd like to avoid catching of all possible # exceptions here, but otherwise it would become possible for # the state reporting thread to stop abruptly, and thus leave # the service unusable until it's restarted. LOG.exception( _LE('Unexpected error while reporting service status')) # trigger the recovery log message, if this error goes away service.model_disconnected = True
def check_attach_volume_complete(self, volume): LOG.info(_LI("wait volume(%s) attach complete"), volume) by_status = ['available', 'attaching'] expect_status = ['in-use'] return self.check_opt_volume_complete("attach", volume, by_status, expect_status)
def check_delete_snapshot_complete(self, snap_id): try: snap = self.client().volume_snapshots.get(snap_id) except Exception as ex: if not self.ignore_not_found(ex): raise return True if snap.status in ('deleting'): LOG.debug("Snapshot %(id)s is being deleted - " "status: %(status)s" % { 'id': snap_id, 'status': snap.status }) return False if snap.status == 'error': LOG.debug("delete failed - snapshot %(snap)s is " "in %(status)s status" % { "snap": snap_id, "status": snap.status }) raise exception.ResourceUnknownStatus( resource_status=snap.status, result=_('Snapshot delete failed')) LOG.info(_LI('delete snapshot %(id)s complete'), {'id': snap_id}) return True
def update_instance_info(self, context, host_name, instance_info): """Receives an InstanceList object from a compute node. This method receives information from a compute node when it starts up, or when its instances have changed, and updates its view of hosts and instances with it. """ host_info = self._instance_info.get(host_name) if host_info: inst_dict = host_info.get("instances") for instance in instance_info.objects: # Overwrite the entry (if any) with the new info. inst_dict[instance.uuid] = instance host_info["updated"] = True else: instances = instance_info.objects if len(instances) > 1: # This is a host sending its full instance list, so use it. host_info = self._instance_info[host_name] = {} host_info["instances"] = { instance.uuid: instance for instance in instances } host_info["updated"] = True else: self._recreate_instance_info(context, host_name) LOG.info( _LI("Received an update from an unknown host '%s'. " "Re-created its InstanceList."), host_name)
def _match_forced_hosts(host_map, hosts_to_force): forced_hosts = [] lowered_hosts_to_force = [host.lower() for host in hosts_to_force] for (hostname, nodename) in list(host_map.keys()): if hostname.lower() not in lowered_hosts_to_force: del host_map[(hostname, nodename)] else: forced_hosts.append(hostname) if host_map: forced_hosts_str = ', '.join(forced_hosts) msg = _LI('Host filter forcing available hosts to %s') else: forced_hosts_str = ', '.join(hosts_to_force) msg = _LI("No hosts matched due to not matching " "'force_hosts' value of '%s'") LOG.info(msg % forced_hosts_str)
def _preserve_multipath_id(self, connection_info): if self['connection_info'] and 'data' in self['connection_info']: if 'multipath_id' in self['connection_info']['data']: connection_info['data']['multipath_id'] =\ self['connection_info']['data']['multipath_id'] LOG.info(_LI('preserve multipath_id %s'), connection_info['data']['multipath_id'])
def _report_state(self, service): """Update the state of this service in the datastore.""" try: service.service_ref.report_count += 1 service.service_ref.save() # TODO(termie): make this pattern be more elegant. if getattr(service, 'model_disconnected', False): service.model_disconnected = False LOG.info( _LI('Recovered from being unable to report status.')) except messaging.MessagingTimeout: # NOTE(johngarbutt) during upgrade we will see messaging timeouts # as compute-conductor is restarted, so only log this error once. if not getattr(service, 'model_disconnected', False): service.model_disconnected = True LOG.warn(_LW('Lost connection to compute-conductor ' 'for reporting service status.')) except Exception: # NOTE(rpodolyaka): we'd like to avoid catching of all possible # exceptions here, but otherwise it would become possible for # the state reporting thread to stop abruptly, and thus leave # the service unusable until it's restarted. LOG.exception( _LE('Unexpected error while reporting service status')) # trigger the recovery log message, if this error goes away service.model_disconnected = True
def attach_volume(self, context, **kwargs): """Shadows the device and passes an unencrypted version to the instance. Transparent disk encryption is achieved by mounting the volume via dm-crypt and passing the resulting device to the instance. The instance is unaware of the underlying encryption due to modifying the original symbolic link to refer to the device mounted by dm-crypt. """ key = self._get_key(context).get_encoded() passphrase = self._get_passphrase(key) try: self._open_volume(passphrase, **kwargs) except processutils.ProcessExecutionError as e: if e.exit_code == 1 and not is_luks(self.dev_path): # the device has never been formatted; format it and try again LOG.info( _LI("%s is not a valid LUKS device;" " formatting device for first use"), self.dev_path) self._format_volume(passphrase, **kwargs) self._open_volume(passphrase, **kwargs) else: raise # modify the original symbolic link to refer to the decrypted device utils.execute('ln', '--symbolic', '--force', '/dev/mapper/%s' % self.dev_name, self.symlink_path, run_as_root=True, check_exit_code=True)
def attach_volume(self, context, **kwargs): """Shadows the device and passes an unencrypted version to the instance. Transparent disk encryption is achieved by mounting the volume via dm-crypt and passing the resulting device to the instance. The instance is unaware of the underlying encryption due to modifying the original symbolic link to refer to the device mounted by dm-crypt. """ key = self._get_key(context).get_encoded() passphrase = self._get_passphrase(key) try: self._open_volume(passphrase, **kwargs) except processutils.ProcessExecutionError as e: if e.exit_code == 1 and not is_luks(self.dev_path): # the device has never been formatted; format it and try again LOG.info(_LI("%s is not a valid LUKS device;" " formatting device for first use"), self.dev_path) self._format_volume(passphrase, **kwargs) self._open_volume(passphrase, **kwargs) else: raise # modify the original symbolic link to refer to the decrypted device utils.execute('ln', '--symbolic', '--force', '/dev/mapper/%s' % self.dev_name, self.symlink_path, run_as_root=True, check_exit_code=True)
def add_to_instance(self, context, instance, security_group_name): """Add security group to the instance.""" neutron = neutronapi.get_client(context) try: security_group_id = neutronv20.find_resourceid_by_name_or_id( neutron, 'security_group', security_group_name, context.project_id) except n_exc.NeutronClientNoUniqueMatch as e: raise exception.NoUniqueMatch(six.text_type(e)) except n_exc.NeutronClientException as e: exc_info = sys.exc_info() if e.status_code == 404: msg = (_("Security group %(name)s is not found for " "project %(project)s") % { 'name': security_group_name, 'project': context.project_id }) self.raise_not_found(msg) else: LOG.exception(_LE("Neutron Error:")) six.reraise(*exc_info) params = {'device_id': instance.uuid} try: ports = neutron.list_ports(**params).get('ports') except n_exc.NeutronClientException: with excutils.save_and_reraise_exception(): LOG.exception(_LE("Neutron Error:")) if not ports: msg = (_("instance_id %s could not be found as device id on" " any ports") % instance.uuid) self.raise_not_found(msg) for port in ports: if not self._has_security_group_requirements(port): LOG.warning( _LW("Cannot add security group %(name)s to " "%(instance)s since the port %(port_id)s " "does not meet security requirements"), { 'name': security_group_name, 'instance': instance.uuid, 'port_id': port['id'] }) raise exception.SecurityGroupCannotBeApplied() if 'security_groups' not in port: port['security_groups'] = [] port['security_groups'].append(security_group_id) updated_port = {'security_groups': port['security_groups']} try: LOG.info( _LI("Adding security group %(security_group_id)s to " "port %(port_id)s"), { 'security_group_id': security_group_id, 'port_id': port['id'] }) neutron.update_port(port['id'], {'port': updated_port}) except Exception: with excutils.save_and_reraise_exception(): LOG.exception(_LE("Neutron Error:"))
def _query_driver_power_state_and_sync(self, context, db_instance, vm_power_state): if db_instance.task_state is not None: LOG.info(_LI("During sync_power_state the instance has a " "pending task (%(task)s). Skip."), {'task': db_instance.task_state}, instance=db_instance) return # No pending tasks. Now try to figure out the real vm_power_state. # try: # vm_instance = self.driver.get_info(db_instance) # vm_power_state = vm_instance.state # except exception.InstanceNotFound: # vm_power_state = power_state.NOSTATE # Note(maoy): the above get_info call might take a long time, # for example, because of a broken libvirt driver. try: self._sync_instance_power_state(context, db_instance, vm_power_state, use_slave=True) except exception.InstanceNotFound: # NOTE(hanlind): If the instance gets deleted during sync, # silently ignore. pass
def reboot(self, context, instance, network_info, reboot_type, block_device_info=None, bad_volumes_callback=None): """Reboot the specified instance. NOTE: Ironic does not support soft-off, so this method always performs a hard-reboot. NOTE: Unlike the libvirt driver, this method does not delete and recreate the instance; it preserves local state. :param context: The security context. :param instance: The instance object. :param network_info: Instance network information. Ignored by this driver. :param reboot_type: Either a HARD or SOFT reboot. Ignored by this driver. :param block_device_info: Info pertaining to attached volumes. Ignored by this driver. :param bad_volumes_callback: Function to handle any bad volumes encountered. Ignored by this driver. """ LOG.debug('Reboot called for instance', instance=instance) node = self._validate_instance_and_node(instance) self.ironicclient.call("node.set_power_state", node.uuid, 'reboot') timer = loopingcall.FixedIntervalLoopingCall( self._wait_for_power_state, instance, 'reboot') timer.start(interval=CONF.ironic.api_retry_interval).wait() LOG.info(_LI('Successfully rebooted Ironic node %s'), node.uuid, instance=instance)
def destroy(self, context, instance, network_info, block_device_info=None, destroy_disks=True, migrate_data=None): """Destroy the specified instance, if it can be found. :param context: The security context. :param instance: The instance object. :param network_info: Instance network information. :param block_device_info: Instance block device information. Ignored by this driver. :param destroy_disks: Indicates if disks should be destroyed. Ignored by this driver. :param migrate_data: implementation specific params. Ignored by this driver. """ LOG.debug('Destroy called for instance', instance=instance) try: node = self._validate_instance_and_node(instance) except exception.InstanceNotFound: LOG.warning(_LW("Destroy called on non-existing instance %s."), instance.uuid) # NOTE(deva): if compute.compute.ComputeManager._delete_instance() # is called on a non-existing instance, the only way # to delete it is to return from this method # without raising any exceptions. return if node.provision_state in _UNPROVISION_STATES: self._unprovision(instance, node) self._cleanup_deploy(node, instance, network_info) LOG.info(_LI('Successfully unprovisioned Ironic node %s'), node.uuid, instance=instance)
def _error(self, inner, req): LOG.exception(_LE("Caught error: %s"), six.text_type(inner)) safe = getattr(inner, 'safe', False) headers = getattr(inner, 'headers', None) status = getattr(inner, 'code', 500) if status is None: status = 500 msg_dict = dict(url=req.url, status=status) LOG.info(_LI("%(url)s returned with HTTP %(status)d"), msg_dict) outer = self.status_to_type(status) if headers: outer.headers = headers # NOTE(johannes): We leave the explanation empty here on # purpose. It could possibly have sensitive information # that should not be returned back to the user. See # bugs 868360 and 874472 # NOTE(eglynn): However, it would be over-conservative and # inconsistent with the EC2 API to hide every exception, # including those that are safe to expose, see bug 1021373 if safe: user_locale = req.best_match_language() inner_msg = translate(inner.message, user_locale) outer.explanation = '%s: %s' % (inner.__class__.__name__, inner_msg) notifications.send_api_fault(req.url, status, inner) return wsgi.Fault(outer)
def unfilter_instance(self, instance, network_info): if self.instance_info.pop(instance.id, None): self.remove_filters_for_instance(instance) self.iptables.apply() else: LOG.info(_LI('Attempted to unfilter instance which is not ' 'filtered'), instance=instance)
def check_attach_volume_complete(self, volume): LOG.info(_LI("wait volume(%s) attach complete"), volume) by_status = ['available', 'attaching'] expect_status = ['in-use'] return self.check_opt_volume_complete("attach", volume, by_status, expect_status)
def remove_from_instance(self, context, instance, security_group_name): """Remove the security group associated with the instance.""" neutron = neutronapi.get_client(context) try: security_group_id = neutronv20.find_resourceid_by_name_or_id( neutron, 'security_group', security_group_name, context.project_id) except n_exc.NeutronClientException as e: exc_info = sys.exc_info() if e.status_code == 404: msg = (_("Security group %(name)s is not found for " "project %(project)s") % {'name': security_group_name, 'project': context.project_id}) self.raise_not_found(msg) else: LOG.exception(_LE("Neutron Error:")) six.reraise(*exc_info) params = {'device_id': instance.uuid} try: ports = neutron.list_ports(**params).get('ports') except n_exc.NeutronClientException: with excutils.save_and_reraise_exception(): LOG.exception(_LE("Neutron Error:")) if not ports: msg = (_("instance_id %s could not be found as device id on" " any ports") % instance.uuid) self.raise_not_found(msg) found_security_group = False for port in ports: try: port.get('security_groups', []).remove(security_group_id) except ValueError: # When removing a security group from an instance the security # group should be on both ports since it was added this way if # done through the compute api. In case it is not a 404 is only # raised if the security group is not found on any of the # ports on the instance. continue updated_port = {'security_groups': port['security_groups']} try: LOG.info(_LI("Adding security group %(security_group_id)s to " "port %(port_id)s"), {'security_group_id': security_group_id, 'port_id': port['id']}) neutron.update_port(port['id'], {'port': updated_port}) found_security_group = True except Exception: with excutils.save_and_reraise_exception(): LOG.exception(_LE("Neutron Error:")) if not found_security_group: msg = (_("Security group %(security_group_name)s not associated " "with the instance %(instance)s") % {'security_group_name': security_group_name, 'instance': instance.uuid}) self.raise_not_found(msg)
def create(self, req, server_id, body): """Attach an interface to an instance.""" context = req.environ['compute.context'] authorize(context) network_id = None port_id = None req_ip = None if body: attachment = body['interfaceAttachment'] network_id = attachment.get('net_id', None) port_id = attachment.get('port_id', None) try: req_ip = attachment['fixed_ips'][0]['ip_address'] except Exception: pass if network_id and port_id: msg = _("Must not input both network_id and port_id") raise exc.HTTPBadRequest(explanation=msg) if req_ip and not network_id: msg = _("Must input network_id when request IP address") raise exc.HTTPBadRequest(explanation=msg) if req_ip: try: netaddr.IPAddress(req_ip) except netaddr.AddrFormatError as e: raise exc.HTTPBadRequest(explanation=six.text_type(e)) try: instance = common.get_instance(self.compute_api, context, server_id) LOG.info(_LI("Attach interface"), instance=instance) vif = self.compute_api.attach_interface(context, instance, network_id, port_id, req_ip) except (exception.PortNotFound, exception.NetworkNotFound) as e: raise exc.HTTPNotFound(explanation=e.format_message()) except (exception.FixedIpAlreadyInUse, exception.InterfaceAttachFailedNoNetwork, exception.NoMoreFixedIps, exception.PortInUse, exception.NetworkDuplicated, exception.NetworkAmbiguous, exception.PortNotUsable) as e: raise exc.HTTPBadRequest(explanation=e.format_message()) except exception.InstanceIsLocked as e: raise exc.HTTPConflict(explanation=e.format_message()) except NotImplementedError: msg = _("Network driver does not support this function.") raise webob.exc.HTTPNotImplemented(explanation=msg) except exception.InterfaceAttachFailed: msg = _("Failed to attach interface") raise webob.exc.HTTPInternalServerError(explanation=msg) except exception.InstanceInvalidState as state_error: common.raise_http_conflict_for_instance_invalid_state(state_error, 'attach_interface', server_id) return self.show(req, server_id, vif['id'])
def _error(self, inner, req): if not isinstance(inner, exception.QuotaError): LOG.exception(_LE("Caught error: %(type)s %(error)s"), { 'type': type(inner), 'error': inner }) safe = getattr(inner, 'safe', False) headers = getattr(inner, 'headers', None) status = getattr(inner, 'code', 500) if status is None: status = 500 msg_dict = dict(url=req.url, status=status) LOG.info(_LI("%(url)s returned with HTTP %(status)d"), msg_dict) outer = self.status_to_type(status) if headers: outer.headers = headers # NOTE(johannes): We leave the explanation empty here on # purpose. It could possibly have sensitive information # that should not be returned back to the user. See # bugs 868360 and 874472 # NOTE(eglynn): However, it would be over-conservative and # inconsistent with the EC2 API to hide every exception, # including those that are safe to expose, see bug 1021373 if safe: msg = (inner.msg if isinstance(inner, exception.JacketException) else six.text_type(inner)) params = { 'exception': inner.__class__.__name__, 'explanation': msg } outer.explanation = _('%(exception)s: %(explanation)s') % params return wsgi.Fault(outer)
def update_instance_info(self, context, host_name, instance_info): """Receives an InstanceList object from a compute node. This method receives information from a compute node when it starts up, or when its instances have changed, and updates its view of hosts and instances with it. """ host_info = self._instance_info.get(host_name) if host_info: inst_dict = host_info.get("instances") for instance in instance_info.objects: # Overwrite the entry (if any) with the new info. inst_dict[instance.uuid] = instance host_info["updated"] = True else: instances = instance_info.objects if len(instances) > 1: # This is a host sending its full instance list, so use it. host_info = self._instance_info[host_name] = {} host_info["instances"] = {instance.uuid: instance for instance in instances} host_info["updated"] = True else: self._recreate_instance_info(context, host_name) LOG.info(_LI("Received an update from an unknown host '%s'. " "Re-created its InstanceList."), host_name)
def check_extend_volume_complete(self, volume): LOG.info(_LI("wait volume(%s) extend complete"), volume) by_status = ['extending'] expect_status = ['available'] not_expect_status = ['error_extending'] return self.check_opt_volume_complete("extend", volume, by_status, expect_status, not_expect_status)
def create(self, req, body): """Creates a new snapshot.""" context = req.environ['compute.context'] authorize(context) if not self.is_valid_body(body, 'snapshot'): msg = _("snapshot not specified") raise exc.HTTPBadRequest(explanation=msg) snapshot = body['snapshot'] volume_id = snapshot['volume_id'] LOG.info(_LI("Create snapshot from volume %s"), volume_id, context=context) force = snapshot.get('force', False) try: force = strutils.bool_from_string(force, strict=True) except ValueError: msg = _("Invalid value '%s' for force.") % force raise exc.HTTPBadRequest(explanation=msg) if force: create_func = self.volume_api.create_snapshot_force else: create_func = self.volume_api.create_snapshot new_snapshot = create_func(context, volume_id, snapshot.get('display_name'), snapshot.get('display_description')) retval = _translate_snapshot_detail_view(context, new_snapshot) return {'snapshot': retval}
def _match_forced_hosts(host_map, hosts_to_force): forced_hosts = [] lowered_hosts_to_force = [host.lower() for host in hosts_to_force] for (hostname, nodename) in list(host_map.keys()): if hostname.lower() not in lowered_hosts_to_force: del host_map[(hostname, nodename)] else: forced_hosts.append(hostname) if host_map: forced_hosts_str = ', '.join(forced_hosts) msg = _LI('Host filter forcing available hosts to %s') else: forced_hosts_str = ', '.join(hosts_to_force) msg = _LI("No hosts matched due to not matching " "'force_hosts' value of '%s'") LOG.info(msg % forced_hosts_str)
def host_passes(self, host_state, spec_obj): """Skip nodes that have already been attempted.""" retry = spec_obj.retry if not retry: # Re-scheduling is disabled LOG.debug("Re-scheduling is disabled") return True # TODO(sbauza): Once the HostState is actually a ComputeNode, we could # easily get this one... host = [host_state.host, host_state.nodename] # TODO(sbauza)... and we wouldn't need to primitive the hosts into # lists hosts = [[cn.host, cn.hypervisor_hostname] for cn in retry.hosts] passes = host not in hosts if not passes: LOG.info( _LI("Host %(host)s fails. Previously tried hosts: " "%(hosts)s"), { 'host': host, 'hosts': hosts }) # Host passes if it's not in the list of previously attempted hosts: return passes
def _determine_version_cap(self, target): global LAST_VERSION if LAST_VERSION: return LAST_VERSION service_version = objects.Service.get_minimum_version( context.get_admin_context(), 'nova-compute') history = service_obj.SERVICE_VERSION_HISTORY try: version_cap = history[service_version]['compute_rpc'] except IndexError: LOG.error(_LE('Failed to extract compute RPC version from ' 'service history because I am too ' 'old (minimum version is now %(version)i)'), {'version': service_version}) raise exception.ServiceTooOld(thisver=service_obj.SERVICE_VERSION, minver=service_version) except KeyError: LOG.error(_LE('Failed to extract compute RPC version from ' 'service history for version %(version)i'), {'version': service_version}) return target.version LAST_VERSION = version_cap LOG.info(_LI('Automatically selected compute RPC version %(rpc)s ' 'from minimum service version %(service)i'), {'rpc': version_cap, 'service': service_version}) return version_cap
def filter_all(self, cells, filter_properties): """Override filter_all() which operates on the full list of cells... """ scheduler_hints = filter_properties.get('scheduler_hints') if not scheduler_hints: return cells # This filter only makes sense at the top level, as a full # cell name is specified. So we pop 'target_cell' out of the # hints dict. cell_name = scheduler_hints.pop('target_cell', None) if not cell_name: return cells # This authorization is after popping off target_cell, so # that in case this fails, 'target_cell' is not left in the # dict when child cells go to schedule. if not self.authorized(filter_properties['context']): # No filtering, if not authorized. return cells LOG.info( _LI("Forcing direct route to %(cell_name)s because " "of 'target_cell' scheduler hint"), {'cell_name': cell_name}) scheduler = filter_properties['scheduler'] if cell_name == filter_properties['routing_path']: return [scheduler.state_manager.get_my_state()] ctxt = filter_properties['context'] scheduler.msg_runner.build_instances( ctxt, cell_name, filter_properties['host_sched_kwargs'])
def check_upload_image_volume_complete(self, volume): LOG.info(_LI("wait volume(%s) upload image complete"), volume) by_status = ['uploading'] expect_status = [] not_expect_status = ["error"] return self.check_opt_volume_complete("upload_image", volume, by_status, expect_status, not_expect_status)
def check_extend_volume_complete(self, volume): LOG.info(_LI("wait volume(%s) extend complete"), volume) by_status = ['extending'] expect_status = ['available'] not_expect_status = ['error_extending'] return self.check_opt_volume_complete("extend", volume, by_status, expect_status, not_expect_status)
def start(self): verstr = version.version_string_with_package() LOG.info(_LI('Starting %(topic)s node (version %(version)s)'), {'topic': self.topic, 'version': verstr}) self.basic_config_check() self.manager.init_host() self.model_disconnected = False ctxt = context.get_admin_context() self.service_ref = objects.Service.get_by_host_and_binary( ctxt, self.host, self.binary) if not self.service_ref: try: self.service_ref = _create_service_ref(self, ctxt) except (exception.ServiceTopicExists, exception.ServiceBinaryExists): # NOTE(danms): If we race to create a record with a sibling # worker, don't fail here. self.service_ref = objects.Service.get_by_host_and_binary( ctxt, self.host, self.binary) self.manager.pre_start_hook() if self.backdoor_port is not None: self.manager.backdoor_port = self.backdoor_port LOG.debug("Creating RPC server for service %s", self.topic) target = messaging.Target(topic=self.topic, server=self.host) endpoints = [ self.manager, baserpc.BaseRPCAPI(self.manager.service_name, self.backdoor_port) ] endpoints.extend(self.manager.additional_endpoints) # serializer = objects_base.NovaObjectSerializer() serializer = objects_base.JacketObjectSerializer() self.rpcserver = rpc.get_server(target, endpoints, serializer) self.rpcserver.start() self.manager.post_start_hook() LOG.debug("Join ServiceGroup membership for this service %s", self.topic) # Add service to the ServiceGroup membership group. self.servicegroup_api.join(self.host, self.topic, self) if self.periodic_enable: if self.periodic_fuzzy_delay: initial_delay = random.randint(0, self.periodic_fuzzy_delay) else: initial_delay = None self.tg.add_dynamic_timer(self.periodic_tasks, initial_delay=initial_delay, periodic_interval_max= self.periodic_interval_max)
def update_available_resource(self, context): """See driver.get_available_resource() Periodic process that keeps that the compute host's understanding of resource availability and usage in sync with the underlying hypervisor. :param context: security context """ new_resource_tracker_dict = {} compute_nodes_in_db = self._get_compute_nodes_in_db(context, use_slave=True) nodenames = set(self.driver.get_available_nodes()) for nodename in nodenames: rt = self._get_resource_tracker(nodename) try: rt.update_available_resource(context) except exception.ComputeHostNotFound: # NOTE(comstud): We can get to this case if a node was # marked 'deleted' in the DB and then re-added with a # different auto-increment id. The cached resource # tracker tried to update a deleted record and failed. # Don't add this resource tracker to the new dict, so # that this will resolve itself on the next run. LOG.info( _LI("Compute node '%s' not found in " "update_available_resource."), nodename) continue except Exception: LOG.exception( _LE("Error updating resources for node " "%(node)s."), {'node': nodename}) new_resource_tracker_dict[nodename] = rt # NOTE(comstud): Replace the RT cache before looping through # compute nodes to delete below, as we can end up doing greenthread # switches there. Best to have everyone using the newest cache # ASAP. self._resource_tracker_dict = new_resource_tracker_dict # Delete orphan compute node not reported by driver but still in db for cn in compute_nodes_in_db: if cn.hypervisor_hostname not in nodenames: LOG.info(_LI("Deleting orphan compute node %s"), cn.id) cn.destroy()
def get_wsgi_server(): LOG.info(_LI("Starting compute-xvpvncproxy node (version %s)"), version.version_string_with_package()) return compute.Server("XCP VNC Proxy", XCPVNCProxy(), protocol=SafeHttpProtocol, host=CONF.vnc.xvpvncproxy_host, port=CONF.vnc.xvpvncproxy_port)
def check_delete_volume_complete(self, volume): LOG.info(_LI("wait volume(%s) delete complete"), volume) by_status = ['deleting'] expect_status = [] not_expect_status = ['error'] return self.check_opt_volume_complete("create", volume, by_status, expect_status, not_expect_status, is_ignore_not_found=True)
def _strip_ignore_hosts(host_map, hosts_to_ignore): ignored_hosts = [] for host in hosts_to_ignore: for (hostname, nodename) in list(host_map.keys()): if host.lower() == hostname.lower(): del host_map[(hostname, nodename)] ignored_hosts.append(host) ignored_hosts_str = ', '.join(ignored_hosts) LOG.info(_LI('Host filter ignoring hosts: %s'), ignored_hosts_str)
def create(self, req, body): """Creates a new volume.""" context = req.environ['compute.context'] authorize(context) if not self.is_valid_body(body, 'volume'): msg = _("volume not specified") raise exc.HTTPBadRequest(explanation=msg) vol = body['volume'] vol_type = vol.get('volume_type', None) metadata = vol.get('metadata', None) snapshot_id = vol.get('snapshot_id') if snapshot_id is not None: try: snapshot = self.volume_api.get_snapshot(context, snapshot_id) except exception.SnapshotNotFound as e: raise exc.HTTPNotFound(explanation=e.format_message()) else: snapshot = None size = vol.get('size', None) if size is None and snapshot is not None: size = snapshot['volume_size'] LOG.info(_LI("Create volume of %s GB"), size, context=context) availability_zone = vol.get('availability_zone', None) try: new_volume = self.volume_api.create( context, size, vol.get('display_name'), vol.get('display_description'), snapshot=snapshot, volume_type=vol_type, metadata=metadata, availability_zone=availability_zone ) except exception.InvalidInput as err: raise exc.HTTPBadRequest(explanation=err.format_message()) except exception.OverQuota as err: raise exc.HTTPForbidden(explanation=err.format_message()) # TODO(vish): Instance should be None at db layer instead of # trying to lazy load, but for now we turn it into # a dict to avoid an error. retval = _translate_volume_detail_view(context, dict(new_volume)) result = {'volume': retval} location = '%s/%s' % (req.url, new_volume['id']) return wsgi.ResponseObject(result, headers=dict(location=location))
def upgrade(migrate_engine): meta, table, index = _get_table_index(migrate_engine) if index: LOG.info(_LI('Skipped adding %s because an equivalent index' ' already exists.'), INDEX_NAME) return columns = [getattr(table.c, col_name) for col_name in INDEX_COLUMNS] index = Index(INDEX_NAME, *columns) index.create(migrate_engine)
def check_upload_image_volume_complete(self, volume): LOG.info(_LI("wait volume(%s) upload image complete"), volume) by_status = ['uploading'] expect_status = [] not_expect_status = ["error"] return self.check_opt_volume_complete("upload_image", volume, by_status, expect_status, not_expect_status)
def _strip_ignore_hosts(host_map, hosts_to_ignore): ignored_hosts = [] for host in hosts_to_ignore: for (hostname, nodename) in list(host_map.keys()): if host.lower() == hostname.lower(): del host_map[(hostname, nodename)] ignored_hosts.append(host) ignored_hosts_str = ', '.join(ignored_hosts) LOG.info(_LI('Host filter ignoring hosts: %s'), ignored_hosts_str)