def power_on(self): if not os.path.exists(self.xml_file): db.enqueue_instance_delete( config.parsed.get('NODE_NAME'), self.db_entry['uuid'], 'error', 'missing domain file in power on') libvirt = util.get_libvirt() with open(self.xml_file) as f: xml = f.read() instance = self._get_domain() if not instance: conn = libvirt.open(None) instance = conn.defineXML(xml) if not instance: db.enqueue_instance_delete( config.parsed.get('NODE_NAME'), self.db_entry['uuid'], 'error', 'power on failed to create domain') raise exceptions.NoDomainException() try: instance.create() except libvirt.libvirtError as e: if not str(e).startswith('Requested operation is not valid: domain is already running'): logutil.warning([self], 'Instance start error: %s' % e) return False instance.setAutostart(1) db.update_instance_power_state( self.db_entry['uuid'], util.extract_power_state(libvirt, instance)) db.add_event( 'instance', self.db_entry['uuid'], 'poweron', 'complete', None, None) return True
def is_created(self): """Attempt to ensure network has been created successfully""" subst = self.subst_dict() if not util.check_for_interface(subst['vx_bridge'], up=True): logutil.warning([self], '%s is not up' % subst['vx_bridge']) return False return True
def is_dnsmasq_running(self): """Determine if dnsmasq process is running for this network""" subst = self.subst_dict() d = dhcp.DHCP(self.uuid, subst['vx_veth_inner']) pid = d.get_pid() if pid and psutil.pid_exists(pid): return True logutil.warning([self], 'dnsmasq is not running') return False
def _restart_queue(queuename): queue_path = _construct_key('processing', queuename, None) with get_lock('queue', None, queuename): for data, metadata in Etcd3Client().get_prefix(queue_path, sort_order='ascend'): jobname = str(metadata.key).split('/')[-1].rstrip("'") workitem = json.loads(data) put('queue', queuename, jobname, workitem) delete('processing', queuename, jobname) logutil.warning(None, 'Reset %s workitem %s' % (queuename, jobname))
def _audit_daemons(): running_daemons = [] for pid in DAEMON_PIDS: running_daemons.append(DAEMON_PIDS[pid]) logutil.info( None, 'Daemons running: %s' % ', '.join(sorted(running_daemons))) for d in DAEMON_IMPLEMENTATIONS: if d not in running_daemons: _start_daemon(d) for d in DAEMON_PIDS: if not psutil.pid_exists(d): logutil.warning( None, '%s pid is missing, restarting' % DAEMON_PIDS[d]) _start_daemon(DAEMON_PIDS[d])
def clear_stale_locks(): # Remove all locks held by former processes on this node. This is required # after an unclean restart, otherwise we need to wait for these locks to # timeout and that can take a long time. client = Etcd3Client() for data, metadata in client.get_prefix('/sflocks/', sort_order='ascend', sort_target='key'): lockname = str(metadata['key']).replace('/sflocks/', '') holder = json.loads(data) node = holder['node'] pid = int(holder['pid']) if node == config.parsed.get( 'NODE_NAME') and not psutil.pid_exists(pid): client.delete(metadata['key']) logutil.warning( None, 'Removed stale lock for %s, previously held by pid %s on %s' % (lockname, pid, node))
def create(self, lock=None): db.update_instance_state(self.db_entry['uuid'], 'creating') # Ensure we have state on disk if not os.path.exists(self.instance_path): logutil.debug( [self], 'Creating instance storage at %s' % self.instance_path) os.makedirs(self.instance_path) # Generate a config drive with util.RecordedOperation('make config drive', self): self._make_config_drive(os.path.join( self.instance_path, self.db_entry['block_devices']['devices'][1]['path'])) # Prepare disks if not self.db_entry['block_devices']['finalized']: modified_disks = [] for disk in self.db_entry['block_devices']['devices']: if disk.get('base'): img = images.Image(disk['base']) hashed_image_path = img.get([lock], self) with util.RecordedOperation('detect cdrom images', self): try: cd = pycdlib.PyCdlib() cd.open(hashed_image_path) disk['present_as'] = 'cdrom' except Exception: pass if disk.get('present_as', 'cdrom') == 'cdrom': # There is no point in resizing or COW'ing a cdrom disk['path'] = disk['path'].replace('.qcow2', '.raw') disk['type'] = 'raw' disk['snapshot_ignores'] = True try: os.link(hashed_image_path, disk['path']) except OSError: # Different filesystems util.execute( [lock], 'cp %s %s' % (hashed_image_path, disk['path'])) # Due to limitations in some installers, cdroms are always on IDE disk['device'] = 'hd%s' % disk['device'][-1] disk['bus'] = 'ide' else: with util.RecordedOperation('resize image', self): resized_image_path = images.resize( [lock], hashed_image_path, disk['size']) if config.parsed.get('DISK_FORMAT') == 'qcow': with util.RecordedOperation('create copy on write layer', self): images.create_cow( [lock], resized_image_path, disk['path']) # Record the backing store for modern libvirts disk['backing'] = ('<backingStore type=\'file\'>\n' ' <format type=\'qcow2\'/>\n' ' <source file=\'%s\'/>\n' ' </backingStore>' % resized_image_path) elif config.parsed.get('DISK_FORMAT') == 'qcow_flat': with util.RecordedOperation('create flat layer', self): images.create_flat( [lock], resized_image_path, disk['path']) elif config.parsed.get('DISK_FORMAT') == 'flat': with util.RecordedOperation('create raw disk', self): images.create_raw( [lock], resized_image_path, disk['path']) else: raise Exception('Unknown disk format') elif not os.path.exists(disk['path']): util.execute(None, 'qemu-img create -f qcow2 %s %sG' % (disk['path'], disk['size'])) modified_disks.append(disk) self.db_entry['block_devices']['devices'] = modified_disks self.db_entry['block_devices']['finalized'] = True db.persist_block_devices( self.db_entry['uuid'], self.db_entry['block_devices']) # Create the actual instance with util.RecordedOperation('create domain XML', self): self._create_domain_xml() # Sometimes on Ubuntu 20.04 we need to wait for port binding to work. # Revisiting this is tracked by issue 320 on github. with util.RecordedOperation('create domain', self): if not self.power_on(): attempts = 0 while not self.power_on() and attempts < 100: logutil.warning( [self], 'Instance required an additional attempt to power on') time.sleep(5) attempts += 1 if self.is_powered_on(): logutil.info([self], 'Instance now powered on') else: logutil.info([self], 'Instance failed to power on') db.update_instance_state(self.db_entry['uuid'], 'created')
def main(): global DAEMON_IMPLEMENTATIONS global DAEMON_PIDS setproctitle.setproctitle(daemon.process_name('main')) # Log configuration on startup for key in config.parsed.config: logutil.info( None, 'Configuration item %s = %s' % (key, config.parsed.get(key))) daemon.set_log_level(LOG, 'main') # Check in early and often, also reset processing queue items db.clear_stale_locks() db.see_this_node() db.restart_queues() # Resource usage publisher, we need this early because scheduling decisions # might happen quite early on. pid = os.fork() if pid == 0: LOG.removeHandler(HANDLER) DAEMON_IMPLEMENTATIONS['resources'].Monitor('resources').run() DAEMON_PIDS[pid] = 'resources' logutil.info(None, 'resources pid is %d' % pid) # If I am the network node, I need some setup if util.is_network_node(): # Bootstrap the floating network in the Networks table floating_network = db.get_network('floating') if not floating_network: db.create_floating_network(config.parsed.get('FLOATING_NETWORK')) floating_network = net.from_db('floating') subst = { 'physical_bridge': 'phy-br-%s' % config.parsed.get('NODE_EGRESS_NIC'), 'physical_nic': config.parsed.get('NODE_EGRESS_NIC') } if not util.check_for_interface(subst['physical_bridge']): # NOTE(mikal): Adding the physical interface to the physical bridge # is considered outside the scope of the orchestration software as it # will cause the node to lose network connectivity. So instead all we # do is create a bridge if it doesn't exist and the wire everything up # to it. We can do egress NAT in that state, even if floating IPs # don't work. with util.RecordedOperation('create physical bridge', 'startup'): # No locking as read only ipm = db.get_ipmanager('floating') subst['master_float'] = ipm.get_address_at_index(1) subst['netmask'] = ipm.netmask util.execute( None, 'ip link add %(physical_bridge)s type bridge' % subst) util.execute(None, 'ip link set %(physical_bridge)s up' % subst) util.execute( None, 'ip addr add %(master_float)s/%(netmask)s dev %(physical_bridge)s' % subst) util.execute( None, 'iptables -A FORWARD -o %(physical_nic)s -i %(physical_bridge)s -j ACCEPT' % subst) util.execute( None, 'iptables -A FORWARD -i %(physical_nic)s -o %(physical_bridge)s -j ACCEPT' % subst) util.execute( None, 'iptables -t nat -A POSTROUTING -o %(physical_nic)s -j MASQUERADE' % subst) def _start_daemon(d): pid = os.fork() if pid == 0: LOG.removeHandler(HANDLER) DAEMON_IMPLEMENTATIONS[d].Monitor(d).run() DAEMON_PIDS[pid] = d logutil.info(None, '%s pid is %d' % (d, pid)) def _audit_daemons(): running_daemons = [] for pid in DAEMON_PIDS: running_daemons.append(DAEMON_PIDS[pid]) logutil.info( None, 'Daemons running: %s' % ', '.join(sorted(running_daemons))) for d in DAEMON_IMPLEMENTATIONS: if d not in running_daemons: _start_daemon(d) for d in DAEMON_PIDS: if not psutil.pid_exists(d): logutil.warning( None, '%s pid is missing, restarting' % DAEMON_PIDS[d]) _start_daemon(DAEMON_PIDS[d]) _audit_daemons() restore_instances() while True: time.sleep(10) wpid, _ = os.waitpid(-1, os.WNOHANG) while wpid != 0: logutil.warning( None, '%s died (pid %d)' % (DAEMON_PIDS.get(wpid, 'unknown'), wpid)) del DAEMON_PIDS[wpid] wpid, _ = os.waitpid(-1, os.WNOHANG) _audit_daemons() db.see_this_node()
def _update_power_states(self): libvirt = util.get_libvirt() conn = libvirt.open(None) try: seen = [] # Active VMs have an ID. Active means running in libvirt # land. for domain_id in conn.listDomainsID(): domain = conn.lookupByID(domain_id) if not domain.name().startswith('sf:'): continue instance_uuid = domain.name().split(':')[1] instance = db.get_instance(instance_uuid) if not instance: # Instance is SF but not in database. Kill to reduce load. logutil.warning([virt.ThinInstance(instance_uuid)], 'Destroying unknown instance') util.execute(None, 'virsh destroy "sf:%s"' % instance_uuid) continue db.place_instance(instance_uuid, config.parsed.get('NODE_NAME')) seen.append(domain.name()) if instance.get('state') == 'deleted': # NOTE(mikal): a delete might be in-flight in the queue. # We only worry about instances which should have gone # away five minutes ago. if time.time() - instance['state_updated'] < 300: continue db.instance_enforced_deletes_increment(instance_uuid) attempts = instance.get('enforced_deletes', 0) if attempts > 5: # Sometimes we just can't delete the VM. Try the big hammer instead. logutil.warning( [virt.ThinInstance(instance_uuid)], 'Attempting alternate delete method for instance') util.execute(None, 'virsh destroy "sf:%s"' % instance_uuid) db.add_event('instance', instance_uuid, 'enforced delete', 'complete', None, None) else: i = virt.from_db(instance_uuid) i.delete() i.update_instance_state('deleted') logutil.warning([virt.ThinInstance(instance_uuid)], 'Deleting stray instance (attempt %d)' % attempts) continue state = util.extract_power_state(libvirt, domain) db.update_instance_power_state(instance_uuid, state) if state == 'crashed': db.update_instance_state(instance_uuid, 'error') # Inactive VMs just have a name, and are powered off # in our state system. for domain_name in conn.listDefinedDomains(): if not domain_name.startswith('sf:'): continue if domain_name not in seen: instance_uuid = domain_name.split(':')[1] instance = db.get_instance(instance_uuid) if instance.get('state') == 'deleted': # NOTE(mikal): a delete might be in-flight in the queue. # We only worry about instances which should have gone # away five minutes ago. if time.time() - instance['state_updated'] < 300: continue domain = conn.lookupByName(domain_name) domain.undefine() logutil.info([virt.ThinInstance(instance_uuid)], 'Detected stray instance') db.add_event('instance', instance_uuid, 'deleted stray', 'complete', None, None) continue db.place_instance(instance_uuid, config.parsed.get('NODE_NAME')) instance_path = os.path.join( config.parsed.get('STORAGE_PATH'), 'instances', instance_uuid) if not os.path.exists(instance_path): # If we're inactive and our files aren't on disk, # we have a problem. logutil.info([virt.ThinInstance(instance_uuid)], 'Detected error state for instance') db.update_instance_state(instance_uuid, 'error') elif instance.get('power_state') != 'off': logutil.info([virt.ThinInstance(instance_uuid)], 'Detected power off for instance') db.update_instance_power_state(instance_uuid, 'off') db.add_event('instance', instance_uuid, 'detected poweroff', 'complete', None, None) except libvirt.libvirtError as e: logutil.error(None, 'Failed to lookup all domains: %s' % e)