def fence_migrate_vm(dom_uuid): VMInstance.flush_locks(zkhandler, logger, dom_uuid) target_node = common.findTargetNode(zkhandler, dom_uuid) if target_node is not None: logger.out( 'Migrating VM "{}" to node "{}"'.format(dom_uuid, target_node), state="i", ) zkhandler.write( [ (("domain.state", dom_uuid), "start"), (("domain.node", dom_uuid), target_node), (("domain.last_node", dom_uuid), node_name), ] ) else: logger.out( 'No target node found for VM "{}"; VM will autostart on next unflush/ready of current node'.format( dom_uuid ), state="i", ) zkhandler.write( { (("domain.state", dom_uuid), "stopped"), (("domain.meta.autostart", dom_uuid), "True"), } )
def flush(self): # Begin flush self.logger.out( 'Flushing node "{}" of running VMs'.format(self.name), state="i" ) self.logger.out("VM list: {}".format(", ".join(self.domain_list)), state="i") fixed_domain_list = self.domain_list.copy() for dom_uuid in fixed_domain_list: # Allow us to cancel the operation if self.flush_stopper: self.logger.out("Aborting node flush", state="i") self.flush_event.set() self.flush_thread = None self.flush_stopper = False return self.logger.out( 'Selecting target to migrate VM "{}"'.format(dom_uuid), state="i" ) # Don't replace the previous node if the VM is already migrated if self.zkhandler.read(("domain.last_node", dom_uuid)): current_node = self.zkhandler.read(("domain.last_node", dom_uuid)) else: current_node = self.zkhandler.read(("domain.node", dom_uuid)) target_node = common.findTargetNode(self.zkhandler, dom_uuid) if target_node == current_node: target_node = None if target_node is None: self.logger.out( 'Failed to find migration target for VM "{}"; shutting down and setting autostart flag'.format( dom_uuid ), state="e", ) self.zkhandler.write( [ (("domain.state", dom_uuid), "shutdown"), (("domain.meta.autostart", dom_uuid), "True"), ] ) else: self.logger.out( 'Migrating VM "{}" to node "{}"'.format(dom_uuid, target_node), state="i", ) self.zkhandler.write( [ (("domain.state", dom_uuid), "migrate"), (("domain.node", dom_uuid), target_node), (("domain.last_node", dom_uuid), current_node), ] ) # Wait for the VM to migrate so the next VM's free RAM count is accurate (they migrate in serial anyways) ticks = 0 self.logger.out( 'Waiting for migration of VM "{}"'.format(dom_uuid), state="i" ) while self.zkhandler.read(("domain.state", dom_uuid)) in [ "migrate", "unmigrate", "shutdown", ]: ticks += 1 if ticks > 600: # Abort if we've waited for 120 seconds, the VM is messed and just continue break time.sleep(0.2) self.zkhandler.write( [ (("node.running_domains", self.name), ""), (("node.state.domain", self.name), "flushed"), ] ) self.flush_thread = None self.flush_stopper = False return
def move_vm(zkhandler, domain, target_node, wait=False, force_live=False): # Validate that VM exists in cluster dom_uuid = getDomainUUID(zkhandler, domain) if not dom_uuid: return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain) # Get state and verify we're OK to proceed current_state = zkhandler.read(("domain.state", dom_uuid)) if current_state != "start": # If the current state isn't start, preserve it; we're not doing live migration target_state = current_state else: if force_live: target_state = "migrate-live" else: target_state = "migrate" current_node = zkhandler.read(("domain.node", dom_uuid)) if not target_node: target_node = common.findTargetNode(zkhandler, dom_uuid) else: # Verify node is valid valid_node = common.verifyNode(zkhandler, target_node) if not valid_node: return False, 'ERROR: Specified node "{}" is invalid.'.format(target_node) # Check if node is within the limit node_limit = zkhandler.read(("domain.meta.node_limit", dom_uuid)) if node_limit and target_node not in node_limit.split(","): return ( False, 'ERROR: Specified node "{}" is not in the allowed list of nodes for VM "{}".'.format( target_node, domain ), ) # Verify if node is current node if target_node == current_node: last_node = zkhandler.read(("domain.last_node", dom_uuid)) if last_node: zkhandler.write([(("domain.last_node", dom_uuid), "")]) return True, 'Making temporary migration permanent for VM "{}".'.format( domain ) return False, 'ERROR: VM "{}" is already running on node "{}".'.format( domain, current_node ) if not target_node: return ( False, 'ERROR: Could not find a valid migration target for VM "{}".'.format( domain ), ) retmsg = 'Permanently migrating VM "{}" to node "{}".'.format(domain, target_node) lock = zkhandler.exclusivelock(("domain.state", dom_uuid)) with lock: zkhandler.write( [ (("domain.state", dom_uuid), target_state), (("domain.node", dom_uuid), target_node), (("domain.last_node", dom_uuid), ""), ] ) # Wait for 1/2 second for migration to start time.sleep(0.5) # Update any SR-IOV NICs update_vm_sriov_nics(zkhandler, dom_uuid, current_node, target_node) if wait: while zkhandler.read(("domain.state", dom_uuid)) == target_state: time.sleep(0.5) retmsg = 'Permanently migrated VM "{}" to node "{}"'.format(domain, target_node) return True, retmsg
def define_vm( zkhandler, config_data, target_node, node_limit, node_selector, node_autostart, migration_method=None, profile=None, tags=[], initial_state="stop", ): # Parse the XML data try: parsed_xml = lxml.objectify.fromstring(config_data) except Exception: return False, "ERROR: Failed to parse XML data." dom_uuid = parsed_xml.uuid.text dom_name = parsed_xml.name.text # Ensure that the UUID and name are unique if searchClusterByUUID(zkhandler, dom_uuid) or searchClusterByName( zkhandler, dom_name ): return ( False, 'ERROR: Specified VM "{}" or UUID "{}" matches an existing VM on the cluster'.format( dom_name, dom_uuid ), ) if not target_node: target_node = common.findTargetNode(zkhandler, dom_uuid) else: # Verify node is valid valid_node = common.verifyNode(zkhandler, target_node) if not valid_node: return False, 'ERROR: Specified node "{}" is invalid.'.format(target_node) # Validate the new RAM against the current active node node_total_memory = int(zkhandler.read(("node.memory.total", target_node))) if int(parsed_xml.memory.text) >= node_total_memory: return ( False, 'ERROR: VM configuration specifies more memory ({} MiB) than node "{}" has available ({} MiB).'.format( parsed_xml.memory.text, target_node, node_total_memory ), ) # Validate the number of vCPUs against the current active node node_total_cpus = int(zkhandler.read(("node.data.static", target_node)).split()[0]) if (node_total_cpus - 2) <= int(parsed_xml.vcpu.text): return ( False, 'ERROR: VM configuration specifies more vCPUs ({}) than node "{}" has available ({} minus 2).'.format( parsed_xml.vcpu.text, target_node, node_total_cpus ), ) # If a SR-IOV network device is being added, set its used state dnetworks = common.getDomainNetworks(parsed_xml, {}) for network in dnetworks: if network["type"] in ["direct", "hostdev"]: dom_node = zkhandler.read(("domain.node", dom_uuid)) # Check if the network is already in use is_used = zkhandler.read( ("node.sriov.vf", dom_node, "sriov_vf.used", network["source"]) ) if is_used == "True": used_by_name = searchClusterByUUID( zkhandler, zkhandler.read( ( "node.sriov.vf", dom_node, "sriov_vf.used_by", network["source"], ) ), ) return ( False, 'ERROR: Attempted to use SR-IOV network "{}" which is already used by VM "{}" on node "{}".'.format( network["source"], used_by_name, dom_node ), ) # We must update the "used" section set_sriov_vf_vm( zkhandler, dom_uuid, dom_node, network["source"], network["mac"], network["type"], ) # Obtain the RBD disk list using the common functions ddisks = common.getDomainDisks(parsed_xml, {}) rbd_list = [] for disk in ddisks: if disk["type"] == "rbd": rbd_list.append(disk["name"]) # Join the limit if isinstance(node_limit, list) and node_limit: formatted_node_limit = ",".join(node_limit) else: formatted_node_limit = "" # Join the RBD list if isinstance(rbd_list, list) and rbd_list: formatted_rbd_list = ",".join(rbd_list) else: formatted_rbd_list = "" # Add the new domain to Zookeeper zkhandler.write( [ (("domain", dom_uuid), dom_name), (("domain.xml", dom_uuid), config_data), (("domain.state", dom_uuid), initial_state), (("domain.profile", dom_uuid), profile), (("domain.stats", dom_uuid), ""), (("domain.node", dom_uuid), target_node), (("domain.last_node", dom_uuid), ""), (("domain.failed_reason", dom_uuid), ""), (("domain.storage.volumes", dom_uuid), formatted_rbd_list), (("domain.console.log", dom_uuid), ""), (("domain.console.vnc", dom_uuid), ""), (("domain.meta.autostart", dom_uuid), node_autostart), (("domain.meta.migrate_method", dom_uuid), migration_method), (("domain.meta.node_limit", dom_uuid), formatted_node_limit), (("domain.meta.node_selector", dom_uuid), node_selector), (("domain.meta.tags", dom_uuid), ""), (("domain.migrate.sync_lock", dom_uuid), ""), ] ) for tag in tags: tag_name = tag["name"] zkhandler.write( [ (("domain.meta.tags", dom_uuid, "tag.name", tag_name), tag["name"]), (("domain.meta.tags", dom_uuid, "tag.type", tag_name), tag["type"]), ( ("domain.meta.tags", dom_uuid, "tag.protected", tag_name), tag["protected"], ), ] ) return True, 'Added new VM with Name "{}" and UUID "{}" to database.'.format( dom_name, dom_uuid )