def get_instances(self, filters): instances = set() try: nova = self.get_nova() filters['host'] = self._hostname servers = nova.servers.list(detailed=True, search_opts=filters) flavors = nova.flavors.list() for server in servers: for flavor in flavors: if flavor.id == server.flavor["id"]: extra_spec = flavor.get_keys() if 'hw:cpu_policy' in extra_spec \ and extra_spec['hw:cpu_policy'] == 'dedicated': inst = instance.instance(server.id, server.name, extra_spec) instances.update([inst]) # get numa topology and pci info from libvirt if len(instances) > 0: for inst in instances: domain = guest.get_guest_domain_by_uuid( self._conn, inst.uuid) inst.update(domain) except Exception as e: LOG.warning("Failed to get instances info! error=%s" % e) return instances
def get_inst(instance_uuid, callback): # get instance info from nova nova_client = nova_provider.get_nova_client() inst = nova_client.get_instance(instance_uuid) if inst is not None: LOG.debug("inst:%s" % inst) callback(inst)
def start_rabbitmq_client(): """Start Rabbitmq client to listen instance notifications from Nova""" cfg = CONF.amqp rabbit_url = "rabbit://%s:%s@%s:%s/%s" % (cfg['user_id'], cfg['password'], cfg['host'], cfg['port'], cfg['virt_host']) topic = cfg['topic'] LOG.info(rabbit_url) target = oslo_messaging.Target(exchange="nova", topic=topic, server="info", version="2.1", fanout=True) transport = oslo_messaging.get_notification_transport(CONF, url=rabbit_url) payload_decoder = UnversionedPayloadDecoder() if topic == 'versioned_notifications': payload_decoder = VersionedPayloadDecoder() endpoints = [ InstanceOnlineNotificationEndpoint(payload_decoder), InstanceOfflineNotificationEndpoint(payload_decoder), ] server = oslo_messaging.get_notification_listener(transport, [target], endpoints, "threading", allow_requeue=True) thread = threading.Thread(target=rpc_work, args=(server, )) thread.start() LOG.info("Rabbitmq Client Started!") return server
def set_irq_affinity(set_bitmap, irqs, cpulist): """Set irq affinity to the specified cpulist for list of irqs. :param set_bitmap: True: set bitmap file, False: set list file :param irqs: irq list :param cpulist: cpu list """ _irqs = set() if set_bitmap: filename = 'smp_affinity' else: filename = 'smp_affinity_list' for irq in irqs: irq_aff_path = "%s/%s/%s" % (COMPUTE_IRQ, irq, filename) try: with open(irq_aff_path, 'w') as f: f.write(cpulist) LOG.info("PCI IRQ %s pinned to CPUS: %s" % (irq, cpulist)) _irqs.update([irq]) except Exception as e: LOG.warning("Failed to write pci affine file:%(F)s, irq:%(I)s, " "error=%(E)s" % { "F": filename, "I": irq, "E": e }) return _irqs
def process_signal_handler(signum, frame): """Process Signal Handler""" global stay_on if signum in [signal.SIGTERM, signal.SIGINT, signal.SIGTSTP]: stay_on = False else: LOG.info("Ignoring signal" % signum)
def get_nova(self): try: sess = session.Session(auth=self._auth, verify=self._cacert) nova = client.Client('2.1', session=sess) return nova except Exception as e: LOG.warning("Failed to connect to nova!") raise Exception("could not connect nova!")
def get_guest_domain_by_uuid(conn, uuid): try: dom = conn.lookupByUUIDString(uuid) except Exception as e: LOG.warning("Failed to get domain for uuid=%s! error=%s" % (uuid, e)) return None domain = get_guest_domain_info(dom) return domain
def do_affine_pci_dev_instance(refresh_need): """Set pci device irq affinity for this instance.""" _irqs = set() _msi_irqs = set() # refresh instance info. if refresh_need: nova_client = nova_provider.get_nova_client() _inst = nova_client.get_instance(inst.uuid) if _inst is None: return numa_topology = _inst.get_numa_topology() extra_spec = _inst.get_extra_spec() for pci_dev in _inst.pci_devices: try: irqs, msi_irqs, pci_numa_node, pci_cpulist = \ pci_utils.set_irqs_affinity_by_pci_address( pci_dev.address, extra_spec, numa_topology) except Exception as e: irqs = set() msi_irqs = set() pci_numa_node = None pci_cpulist = '' LOG.error("Could not affine irqs for pci_addr:%(A)s, " "error: %(E)s" % { "A": pci_dev.address, "E": e }) # Log irqs affined when there is a change in the counts. msi_irq_count = len(msi_irqs) if ((msi_irq_count != self._msi_irq_count[pci_dev.address]) or wait_for_irqs): self._msi_irq_count[pci_dev.address] = msi_irq_count LOG.info( ("Instance=%(U)s: IRQs affined for pci_addr=%(A)s, " "dev_id=%(D)s, dev_type=%(T)s, " "vendor_id=%(V)s, product_id=%(P)s, " "irqs=%(I)s, msi_irqs=%(M)s, " "numa_node=%(N)s, cpulist=%(C)s") % { 'U': inst.uuid, 'A': pci_dev.address, 'D': pci_dev.dev_id, 'T': pci_dev.dev_type, 'V': pci_dev.vendor_id, 'P': pci_dev.product_id, 'I': ', '.join(map(str, irqs)), 'M': ', '.join(map(str, msi_irqs)), 'N': pci_numa_node, 'C': pci_cpulist }) _irqs.update(irqs) _msi_irqs.update(msi_irqs) return (_irqs, _msi_irqs, pci_cpulist)
def affine_pci_dev_instance(self, instance, wait_for_irqs=True): if instance is not None: if instance.get_cpu_policy( ) == 'dedicated' and instance.get_pci_devices(): LOG.debug("Instance=%s use dedicated cpu policy!!!" % instance.uuid) irqs, msi_irqs, cpulist = \ self.affinePciIrqDriver.affine_pci_dev_irqs(instance, wait_for_irqs) # record instance on which pci affinity has been applied self.instance_irq_pcpulist_update(instance.uuid, irqs, msi_irqs, cpulist) return
def instance_irq_pcpulist_update(self, uuid, irqs, msi_irqs, cpulist): if uuid in self.inst_dict: _prev = self.inst_dict[uuid] # get irqs that not appear anymore. _irqs = _prev[0].difference(irqs) _msi_irqs = _prev[1].difference(msi_irqs) # reset pci affinity for those pcis not used by intance anymore if (len(_irqs) + len(_msi_irqs)) > 0: self.reset_irq_affinity(uuid, _irqs, _msi_irqs) self.inst_dict[uuid] = [irqs, msi_irqs, cpulist] LOG.debug(self.inst_dict)
def get_instance(self, uuid): try: nova = self.get_nova() server = nova.servers.get(uuid) flavor_info = nova.flavors.get(server.flavor["id"]) hostname = server.__dict__['OS-EXT-SRV-ATTR:host'] except Exception as e: LOG.warning("Could not get instance=%s from Nova! error=%s" % (uuid, e)) return None LOG.debug('GET VM:%s in node:%s' % (server.name, hostname)) if hostname == self._hostname: inst = instance.instance(uuid, server.name, flavor_info.get_keys()) # get numa topology and pci info from libvirt try: domain = guest.get_guest_domain_by_uuid(self._conn, uuid) if domain: inst.update(domain) except Exception as e: LOG.warning("Failed to access libvirt! error=%s" % e) return inst else: LOG.debug('The VM is not in current host!') return None
def reset_irq_affinity(self, uuid, irqs=None, msi_irqs=None): """Reset irq affinity for instance The instance has already been deleted or related PCI not used by it anymore. """ if irqs or msi_irqs: # reset irq affinity for specified irqs _irqs = irqs _msi_irqs = msi_irqs elif uuid in self.inst_dict: # reset all irq affinity for deleted instance _irqs = self.inst_dict[uuid][0] _msi_irqs = self.inst_dict[uuid][1] else: LOG.debug("No pci affinity need to be reset for instance=%s!" % uuid) return try: with open('%s/default_smp_affinity' % COMPUTE_IRQ) as f: cpulist = f.readline().strip() LOG.debug("default smp affinity bitmap:%s" % cpulist) for x in [_irqs, _msi_irqs]: if len(x) > 0: pci_utils.set_irq_affinity(True, x, cpulist) except Exception as e: LOG.error("Failed to reset smp affinity! error=%s" % e) LOG.info("Reset smp affinity done for instance=%s!" % uuid)
def process_main(): """Entry function for PCI Interrupt Affinity Agent""" LOG.info("Enter PCIInterruptAffinity Agent") try: signal.signal(signal.SIGTSTP, process_signal_handler) openstack_enabled = CONF.openstack.openstack_enabled if openstack_enabled: nova_client = nova_provider.get_nova_client() audit_srv = audits_initialize() rabbit_client = start_rabbitmq_client() while stay_on: time.sleep(1) except KeyboardInterrupt: LOG.info("keyboard Interrupt received.") pass except Exception as e: LOG.info("%s" % e) sys.exit(200) finally: LOG.error("process_main finalized!!!") if openstack_enabled: del nova_client audit_srv.tg.stop() rabbit_client.stop()
def info(self, ctxt, publisher_id, event_type, payload, metadata): instance_host = self.payload_decoder.decode_instance_host(payload) current_host = os.getenv("COMPUTE_HOSTNAME", default=socket.gethostname()) if instance_host is not None and instance_host != current_host: LOG.debug( "Requeue notification: instance_host=%s != current_host=%s" % (instance_host, current_host)) return oslo_messaging.NotificationResult.REQUEUE instance_uuid = self.payload_decoder.decode_instance_uuid(payload) if instance_uuid: LOG.info( "Instance offline: uuid=%s, instance_host=%s, event_type=%s" % (instance_uuid, instance_host, event_type)) affinity.pci_irq_affinity.reset_irq_affinity(instance_uuid)
def _get_pci_irq_affinity_mask(extra_spec): """Parse pci irq affinity mask based on flavor extra-spec. Returns set of vcpu ids with corresponding pci irq affinity mask. """ if 'hw:pci_irq_affinity_mask' in extra_spec: pci_irq_affinity_mask = extra_spec['hw:pci_irq_affinity_mask'] LOG.info("pci_irq_affinity_mask: %s" % pci_irq_affinity_mask) else: LOG.info('Not set pci_irq_affinity_mask!') return None cpuset_ids = parse_cpu_spec(pci_irq_affinity_mask) if not cpuset_ids: raise Exception("No CPUs available after parsing %r" % pci_irq_affinity_mask) return cpuset_ids
def find_and_fill_pci_addrs(dom_xml, device_type): LOG.debug("Finding pci_addrs for %s devices" % device_type) def parse_pci_addr(tag): return "%04x:%02x:%02x.%01x" % (int( tag.get('domain'), base=16), int( tag.get('bus'), base=16), int(tag.get('slot'), base=16), int(tag.get('function'), base=16)) for node in dom_xml.findall('./devices/' + device_type): for driver in node.findall('driver'): if driver.get('name').startswith('vfio'): addr_tag = node.find('source/address') if (addr_tag.get('type') == 'pci' or node.get('type') == 'pci'): pci_addr = parse_pci_addr(addr_tag) LOG.debug("Add pci device: %s" % pci_addr) pci_addrs.update([pci_addr])
def update(self, domain): cells = set() for node_id in domain['nodelist']: cell = numa_cell(node_id, list(range(domain['nr_vcpus'])), domain['cpu_pinning']) LOG.debug("cell_id=%s, vcpuset=%s, cpu_pinning=%s" % (node_id, list(range( domain['nr_vcpus'])), domain['cpu_pinning'])) cells.update([cell]) self.numa_topology = numa_topology(self.uuid, cells) if domain['IsCpuPinned']: self.cpu_policy = 'dedicated' else: self.cpu_policy = 'shared' for pci_addr in domain['pci_addrs']: pci_dev = pci_device(pci_addr) self.pci_devices.update([pci_dev])
def _wait_for_msi_irqs(self, inst): """Check if each pci device has the expected number of msi irqs.""" _prev = self._msi_irq_count.copy() addrs = set() for pci_dev in inst.pci_devices: addr = pci_dev.address addrs.update([addr]) try: irqs, msi_irqs = pci_utils.get_irqs_by_pci_address(addr) except Exception as e: msi_irqs = set() LOG.error( '_wait_for_msi_irqs: pci_addr=%(A)s, error=%(E)s' % { 'A': addr, 'E': e }) self._msi_irq_count[addr] = len(msi_irqs) self._msi_irq_elapsed[ addr] += CONF.parameters.msi_irq_check_interval if _prev[addr] == self._msi_irq_count[addr]: self._msi_irq_since[ addr] += CONF.parameters.msi_irq_check_interval else: self._msi_irq_since[addr] = 0 # Done when msi irq counts have not changed for some time if all((self._msi_irq_count[k] > 0) and ( self._msi_irq_since[k] >= CONF.parameters.msi_irq_since) for k in addrs): raise loopingcall.LoopingCallDone() # Abort due to timeout if all(self._msi_irq_elapsed[k] >= CONF.parameters.msi_irq_timeout for k in addrs): msg = ("reached %(timeout)s seconds timeout, waiting for " "msi irqs of pci_addrs: %(addrs)s") % { 'timeout': CONF.parameters.msi_irq_timeout, 'addrs': list(addrs) } LOG.warning(msg) raise loopingcall.LoopingCallDone()
def get_pci_irqs_pinned_cpuset(extra_spec=None, numa_topology=None, pci_numa_node=None): """Get pinned cpuset where pci irq are affined. :param extra_spec: extra_spec :param pci_numa_node: numa node of a specific PCI device :param numa_topology: instance numa topology :return: cpuset, cpulist """ cpuset = set() cpulist = '' LOG.debug("extra_spec:%s, topo:%s, numa_node:%s" % (extra_spec, numa_topology, pci_numa_node)) if numa_topology is None or pci_numa_node is None or pci_numa_node < 0: return (cpuset, cpulist) # Determine full affinity cpuset, but restrict to pci's numa node for cell in numa_topology.cells: if cell.id == pci_numa_node and cell.cpu_pinning is not None: cpuset.update(set(cell.cpu_pinning.values())) LOG.info("pinning pcpu list:%s" % cpuset) # Use extra-spec hw:pci_irq_affinity_mask only when the instance is pinned. if cpuset: pci_cpuset = _get_pci_irq_affinity_mask(extra_spec) if pci_cpuset: cpuset = set() for cell in numa_topology.cells: if cell.cpu_pinning is not None: for vcpu in cell.cpuset: if vcpu in pci_cpuset: vcpu_cell, pcpu = numa_topology.vcpu_to_pcpu(vcpu) cpuset.update(set([pcpu])) cpulist = list_to_range(input_list=list(cpuset)) return (cpuset, cpulist)
def _get_keystone_creds(self): creds = {} openstack_options = CONF.openstack creds_options = [ 'username', 'password', 'user_domain_name', 'project_name', 'project_domain_name', 'keyring_service', 'auth_url' ] try: for option in creds_options: value = openstack_options[option] if value: creds[option] = value if 'password' not in creds: creds['password'] = keyring.get_password( creds['keyring_service'], creds['username']) creds.pop('keyring_service') except Exception as e: LOG.error("Could not get keystone creds configuration! Err=%s" % e) creds = None return creds
def query_instance_callback(inst): LOG.debug("query inst:%s" % inst) affinity.pci_irq_affinity.affine_pci_dev_instance(inst)
def get_irqs_by_pci_address(pci_addr): """Get list of PCI IRQs based on a VF's pci address Raises PciDeviceNotFoundById in case the pci device is not found, or when there is an underlying problem getting associated irqs. :param pci_addr: PCI address :return: irqs, msi_irqs """ irqs = set() msi_irqs = set() dev_path = "%s/%s" % (COMPUTE_PCI_DEVICES, pci_addr) if not os.path.isdir(dev_path): raise Exception("PciDeviceNotFoundById id = %r" % pci_addr) _irqs = set() irq_path = "%s/irq" % (dev_path) try: with open(irq_path) as f: _irqs.update([int(x) for x in f.readline().split() if int(x) > 0]) except Exception as e: LOG.error( 'get_irqs_by_pci_address: ' 'pci_addr=%(A)s: irq_path=%(P)s; error=%(E)s', { 'A': pci_addr, 'P': irq_path, 'E': e }) raise Exception("PciDeviceNotFoundById id = %r" % pci_addr) _msi_irqs = set() msi_path = "%s/msi_irqs" % (dev_path) try: _msi_irqs.update([int(x) for x in os.listdir(msi_path) if int(x) > 0]) except OSError as e: # msi_path disappears during configuration; do not treat # non-existance as fatal if e.errno == errno.ENOENT: return (irqs, msi_irqs) else: LOG.error( 'get_irqs_by_pci_address: ' 'pci_addr=%(A)s: msi_path=%(P)s; error=%(E)s', { 'A': pci_addr, 'P': msi_path, 'E': e }) raise Exception("PciDeviceNotFoundById id = %r" % pci_addr) except Exception as e: LOG.error( 'get_irqs_by_pci_address: ' 'pci_addr=%(A)s: msi_path=%(P)s; error=%(E)s', { 'A': pci_addr, 'P': msi_path, 'E': e }) raise Exception("PciDeviceNotFoundById id = %r" % pci_addr) # Return only configured irqs, ignore any that are missing. for irq in _irqs: irq_path = "%s/%s" % (COMPUTE_IRQ, irq) if os.path.isdir(irq_path): irqs.update([irq]) for irq in _msi_irqs: irq_path = "%s/%s" % (COMPUTE_IRQ, irq) if os.path.isdir(irq_path): msi_irqs.update([irq]) return (irqs, msi_irqs)
def set_irqs_affinity_by_pci_address(pci_addr, extra_spec=None, numa_topology=None): """Set cpu affinity for list of PCI IRQs with a VF's pci address, Restrict cpuset to the numa node of the PCI. Return list Raises PciDeviceNotFoundById in case the pci device is not found, or when there is an underlying problem getting associated irqs. :param pci_addr: PCI address :param extra_spec: extra_spec :param numa_topology: instance numa topology :return: irqs, msi_irqs, numa_node, cpulist """ irqs = set() msi_irqs = set() numa_node = None cpulist = '' if numa_topology is None: return (irqs, msi_irqs, numa_node, cpulist) # Get the irqs associated with pci addr _irqs, _msi_irqs = get_irqs_by_pci_address(pci_addr) LOG.debug("pci: %s, irqs: %s, msi_irqs: %s" % (pci_addr, _irqs, _msi_irqs)) # Obtain physical numa_node for this pci addr numa_path = "%s/%s/numa_node" % (COMPUTE_PCI_DEVICES, pci_addr) try: with open(numa_path) as f: numa_node = [int(x) for x in f.readline().split()][0] except Exception as e: LOG.error( 'set_irqs_affinity_by_pci_address: ' 'pci_addr=%(A)s: numa_path=%(P)s; error=%(E)s', { 'A': pci_addr, 'P': numa_path, 'E': e }) raise Exception("PciDeviceNotFoundById id = %r" % pci_addr) # Skip irq configuration if there is no associated numa node if numa_node is None or numa_node < 0: return (irqs, msi_irqs, numa_node, cpulist) # Determine the pinned cpuset where irqs are to be affined cpuset, cpulist = get_pci_irqs_pinned_cpuset(extra_spec, numa_topology, numa_node) LOG.debug("cpuset where irqs are to be affined:%s or %s" % (cpuset, cpulist)) # Skip irq configuration if there are no pinned cpus if not cpuset: return (irqs, msi_irqs, numa_node, cpulist) # Set IRQ affinity, but do not treat errors as fatal. LOG.debug("Setting affinity %s for irqs: %s and msi_irqs: %s" % (cpulist, _irqs, _msi_irqs)) irqs = set_irq_affinity(False, _irqs, cpulist) msi_irqs = set_irq_affinity(False, _msi_irqs, cpulist) return (irqs, msi_irqs, numa_node, cpulist)