class LiveGatherOutputHandler(GatherOutputHandler): """Live line-gathered output event handler class (-bL).""" def __init__(self, display, nodes): assert nodes is not None, "cannot gather local command" GatherOutputHandler.__init__(self, display) self._nodes = NodeSet(nodes) self._nodecnt = dict.fromkeys(self._nodes, 0) self._mtreeq = [] self._offload = 0 def ev_read(self, worker, node, sname, msg): if sname != worker.SNAME_STDOUT: GatherOutputHandler.ev_read(self, worker, node, sname, msg) return # Read new line from node self._nodecnt[node] += 1 cnt = self._nodecnt[node] if len(self._mtreeq) < cnt: self._mtreeq.append(MsgTree()) self._mtreeq[cnt - self._offload - 1].add(node, msg) self._live_line(worker) def ev_hup(self, worker, node, rc): if self._mtreeq and node not in self._mtreeq[0]: # forget a node that doesn't answer to continue live line # gathering anyway self._nodes.remove(node) self._live_line(worker) def _live_line(self, worker): # if all nodes have replied, display gathered line while self._mtreeq and len(self._mtreeq[0]) == len(self._nodes): mtree = self._mtreeq.pop(0) self._offload += 1 self._runtimer_clean() nodesetify = lambda v: (v[0], NodeSet.fromlist(v[1])) for buf, nodeset in sorted(map(nodesetify, mtree.walk()), key=bufnodeset_cmpkey): self._display.print_gather(nodeset, buf) self._runtimer_set_dirty() def ev_close(self, worker, timedout): # Worker is closing -- it's time to gather results... self._runtimer_finalize(worker) for mtree in self._mtreeq: nodesetify = lambda v: (v[0], NodeSet.fromlist(v[1])) for buf, nodeset in sorted(map(nodesetify, mtree.walk()), key=bufnodeset_cmpkey): self._display.print_gather(nodeset, buf) self._close_common(worker) # Notify main thread to update its prompt self.update_prompt(worker)
class Dfuse(DfuseCommand): """Class defining an object of type DfuseCommand.""" def __init__(self, hosts, tmp): """Create a dfuse object.""" super().__init__("/run/dfuse/*", "dfuse") # set params self.hosts = hosts self.tmp = tmp self.running_hosts = NodeSet() def __del__(self): """Destruct the object.""" if self.running_hosts: self.log.error('Dfuse object deleted without shutting down') def check_mount_state(self, nodes=None): """Check the dfuse mount point mounted state on the hosts. Args: nodes (NodeSet, optional): hosts on which to check if dfuse is mounted. Defaults to None, which will use all of the hosts. Returns: dict: a dictionary of NodeSets of hosts with the dfuse mount point either "mounted" or "unmounted" """ state = { "mounted": NodeSet(), "unmounted": NodeSet(), "nodirectory": NodeSet() } if not nodes: nodes = NodeSet.fromlist(self.hosts) check_mounted = NodeSet() # Detect which hosts have mount point directories defined command = "test -d {0} -a ! -L {0}".format(self.mount_dir.value) retcodes = pcmd(nodes, command, expect_rc=None) for retcode, hosts in list(retcodes.items()): for host in hosts: if retcode == 0: check_mounted.add(host) else: command = "grep 'dfuse {}' /proc/mounts".format( self.mount_dir.value) retcodes = pcmd([host], command, expect_rc=None) for ret_code, host_names in list(retcodes.items()): for node in host_names: if ret_code == 0: check_mounted.add(node) else: state["nodirectory"].add(node) if check_mounted: # Detect which hosts with mount point directories have it mounted as # a fuseblk device command = "stat -c %T -f {0} | grep -v fuseblk".format( self.mount_dir.value) retcodes = pcmd(check_mounted, command, expect_rc=None) for retcode, hosts in list(retcodes.items()): for host in hosts: if retcode == 1: state["mounted"].add(host) else: state["unmounted"].add(host) return state def get_umount_command(self, force=False): """Get the command to umount the dfuse mount point. Args: force (bool, optional): whether to force the umount with a lazy unmount. Defaults to False. Returns: str: the dfuse umount command """ umount = "-uz" if force else "-u" command = [ "if [ -x '$(command -v fusermount)' ]", "then fusermount {0} {1}".format(umount, self.mount_dir.value), "else fusermount3 {0} {1}".format(umount, self.mount_dir.value), "fi" ] return ";".join(command) def create_mount_point(self): """Create dfuse directory. Raises: CommandFailure: In case of error creating directory """ # Raise exception if mount point not specified if self.mount_dir.value is None: raise CommandFailure("Mount point not specified, " "check test yaml file") # Create the mount point on any host without dfuse already mounted state = self.check_mount_state() if state["nodirectory"]: command = "mkdir -p {}".format(self.mount_dir.value) ret_code = pcmd(state["nodirectory"], command, timeout=30) if len(ret_code) > 1 or 0 not in ret_code: failed_nodes = [ str(node_set) for code, node_set in list(ret_code.items()) if code != 0 ] error_hosts = NodeSet(",".join(failed_nodes)) raise CommandFailure( "Error creating the {} dfuse mount point on the " "following hosts: {}".format(self.mount_dir.value, error_hosts)) def remove_mount_point(self, fail=True): """Remove dfuse directory. Try once with a simple rmdir which should succeed, if this does not then try again with rm -rf, but still raise an error. Raises: CommandFailure: In case of error deleting directory """ # raise exception if mount point not specified if self.mount_dir.value is None: raise CommandFailure("Mount point not specified, " "check test yaml file") dir_exists, clean_nodes = check_file_exists(self.hosts, self.mount_dir.value, directory=True) if dir_exists: target_nodes = list(self.hosts) if clean_nodes: target_nodes.remove(clean_nodes) self.log.info("Removing the %s dfuse mount point on %s", self.mount_dir.value, target_nodes) cmd = "rmdir {}".format(self.mount_dir.value) ret_code = pcmd(target_nodes, cmd, timeout=30) if len(ret_code) == 1 and 0 in ret_code: return failed_nodes = NodeSet(",".join([ str(node_set) for code, node_set in list(ret_code.items()) if code != 0 ])) cmd = "rm -rf {}".format(self.mount_dir.value) ret_code = pcmd(failed_nodes, cmd, timeout=30) if len(ret_code) > 1 or 0 not in ret_code: error_hosts = NodeSet(",".join([ str(node_set) for code, node_set in list(ret_code.items()) if code != 0 ])) if fail: raise CommandFailure( "Error removing the {} dfuse mount point with rm on " "the following hosts: {}".format( self.mount_dir.value, error_hosts)) if fail: raise CommandFailure( "Error removing the {} dfuse mount point with rmdir on the " "following hosts: {}".format(self.mount_dir.value, failed_nodes)) else: self.log.info("No %s dfuse mount point directory found on %s", self.mount_dir.value, self.hosts) def run(self, check=True): # pylint: disable=arguments-differ """Run the dfuse command. Args: check (bool): Check if dfuse mounted properly after mount is executed. Raises: CommandFailure: In case dfuse run command fails """ self.log.info('Starting dfuse at %s', self.mount_dir.value) # A log file must be defined to ensure logs are captured if "D_LOG_FILE" not in self.env: raise CommandFailure( "Dfuse missing environment variables for D_LOG_FILE") # create dfuse dir if does not exist self.create_mount_point() # run dfuse command cmd = "".join([self.env.get_export_str(), self.__str__()]) ret_code = pcmd(self.hosts, cmd, timeout=30) if 0 in ret_code: self.running_hosts.add(ret_code[0]) del ret_code[0] if ret_code: error_hosts = NodeSet(",".join([ str(node_set) for code, node_set in list(ret_code.items()) if code != 0 ])) raise CommandFailure( "Error starting dfuse on the following hosts: {}".format( error_hosts)) if check: # Dfuse will block in the command for the mount to complete, even # if run in background mode so it should be possible to start using # it immediately after the command returns. if not self.check_running(fail_on_error=False): self.log.info('Waiting two seconds for dfuse to start') time.sleep(2) if not self.check_running(fail_on_error=False): self.log.info('Waiting five seconds for dfuse to start') time.sleep(5) self.check_running() def check_running(self, fail_on_error=True): """Check dfuse is running. Run a command to verify dfuse is running on hosts where it is supposed to be. Use grep -v and rc=1 here so that if it isn't, then we can see what is being used instead. Args: fail_on_error (bool, optional): should an exception be raised if an error is detected. Defaults to True. Raises: CommandFailure: raised if dfuse is found not running on any expected nodes and fail_on_error is set. Returns: bool: whether or not dfuse is running """ status = True state = self.check_mount_state(self.running_hosts) if state["unmounted"] or state["nodirectory"]: self.log.error("Error: dfuse not running on %s", str(state["unmounted"].union(state["nodirectory"]))) status = False if fail_on_error: raise CommandFailure("dfuse not running") return status def stop(self): """Stop dfuse. Try to stop dfuse. Try once nicely by using fusermount, then if that fails try to pkill it to see if that works. Abort based on the result of the fusermount, as if pkill is necessary then dfuse itself has not worked correctly. Finally, try and remove the mount point, and that itself should work. Raises: CommandFailure: In case dfuse stop fails """ # Include all hosts when stopping to ensure all mount points in any # state are properly removed self.running_hosts.add(NodeSet.fromlist(self.hosts)) self.log.info("Stopping dfuse at %s on %s", self.mount_dir.value, self.running_hosts) if self.mount_dir.value and self.running_hosts: error_list = [] # Loop until all fuseblk mounted devices are unmounted counter = 0 while self.running_hosts and counter < 3: # Attempt to kill dfuse on after first unmount fails if self.running_hosts and counter > 1: kill_command = "pkill dfuse --signal KILL" pcmd(self.running_hosts, kill_command, timeout=30) # Attempt to unmount any fuseblk mounted devices after detection if self.running_hosts and counter > 0: pcmd(self.running_hosts, self.get_umount_command(counter > 1), expect_rc=None) time.sleep(2) # Detect which hosts have fuseblk mounted devices and remove any # hosts which no longer have the dfuse mount point mounted state = self.check_mount_state(self.running_hosts) for host in state["unmounted"].union(state["nodirectory"]): self.running_hosts.remove(host) # Increment the loop counter counter += 1 if self.running_hosts: error_list.append("Error stopping dfuse on {}".format( self.running_hosts)) # Remove mount points try: self.remove_mount_point() except CommandFailure as error: error_list.append(error) # Report any errors if error_list: raise CommandFailure("\n".join(error_list)) elif self.mount_dir.value is None: self.log.info("No dfuse mount directory defined - nothing to stop") else: self.log.info("No hosts running dfuse - nothing to stop")
class Dfuse(DfuseCommand): """Class defining an object of type DfuseCommand.""" def __init__(self, hosts, tmp): """Create a dfuse object.""" super(Dfuse, self).__init__("/run/dfuse/*", "dfuse") # set params self.hosts = hosts self.tmp = tmp self.running_hosts = NodeSet() def __del__(self): """Destruct the object.""" if len(self.running_hosts): self.log.error('Dfuse object deleted without shutting down') def create_mount_point(self): """Create dfuse directory. Raises: CommandFailure: In case of error creating directory """ # raise exception if mount point not specified if self.mount_dir.value is None: raise CommandFailure("Mount point not specified, " "check test yaml file") _, missing_nodes = check_file_exists(self.hosts, self.mount_dir.value, directory=True) if len(missing_nodes): cmd = "mkdir -p {}".format(self.mount_dir.value) ret_code = pcmd(missing_nodes, cmd, timeout=30) if len(ret_code) > 1 or 0 not in ret_code: error_hosts = NodeSet(",".join([ str(node_set) for code, node_set in ret_code.items() if code != 0 ])) raise CommandFailure( "Error creating the {} dfuse mount point on the following " "hosts: {}".format(self.mount_dir.value, error_hosts)) def remove_mount_point(self, fail=True): """Remove dfuse directory. Try once with a simple rmdir which should succeed, if this does not then try again with rm -rf, but still raise an error. Raises: CommandFailure: In case of error deleting directory """ # raise exception if mount point not specified if self.mount_dir.value is None: raise CommandFailure("Mount point not specified, " "check test yaml file") dir_exists, clean_nodes = check_file_exists(self.hosts, self.mount_dir.value, directory=True) if dir_exists: target_nodes = list(self.hosts) if clean_nodes: target_nodes.remove(clean_nodes) cmd = "rmdir {}".format(self.mount_dir.value) ret_code = pcmd(target_nodes, cmd, timeout=30) if len(ret_code) == 1 and 0 in ret_code: return failed_nodes = NodeSet(",".join([ str(node_set) for code, node_set in ret_code.items() if code != 0 ])) cmd = "rm -rf {}".format(self.mount_dir.value) ret_code = pcmd(failed_nodes, cmd, timeout=30) if len(ret_code) > 1 or 0 not in ret_code: error_hosts = NodeSet(",".join([ str(node_set) for code, node_set in ret_code.items() if code != 0 ])) if fail: raise CommandFailure( "Error removing the {} dfuse mount point with rm on " "the following hosts: {}".format( self.mount_dir.value, error_hosts)) if fail: raise CommandFailure( "Error removing the {} dfuse mount point with rmdir on the " "following hosts: {}".format(self.mount_dir.value, failed_nodes)) def run(self, check=True): """Run the dfuse command. Args: check (bool): Check if dfuse mounted properly after mount is executed. Raises: CommandFailure: In case dfuse run command fails """ self.log.info('Starting dfuse at %s', self.mount_dir.value) # A log file must be defined to ensure logs are captured if "D_LOG_FILE" not in self.env: raise CommandFailure( "Dfuse missing environment variables for D_LOG_FILE") # create dfuse dir if does not exist self.create_mount_point() # run dfuse command cmd = "".join([self.env.get_export_str(), self.__str__()]) ret_code = pcmd(self.hosts, cmd, timeout=30) if 0 in ret_code: self.running_hosts.add(ret_code[0]) del ret_code[0] if len(ret_code): error_hosts = NodeSet(",".join([ str(node_set) for code, node_set in ret_code.items() if code != 0 ])) raise CommandFailure( "Error starting dfuse on the following hosts: {}".format( error_hosts)) if check: # Dfuse will block in the command for the mount to complete, even # if run in background mode so it should be possible to start using # it immediately after the command returns. if not self.check_running(fail_on_error=False): self.log.info('Waiting two seconds for dfuse to start') time.sleep(2) if not self.check_running(fail_on_error=False): self.log.info('Waiting five seconds for dfuse to start') time.sleep(5) self.check_running() def check_running(self, fail_on_error=True): """Check dfuse is running. Run a command to verify dfuse is running on hosts where it is supposed to be. Use grep -v and rc=1 here so that if it isn't, then we can see what is being used instead. """ retcodes = pcmd(self.running_hosts, "stat -c %T -f {0} | grep -v fuseblk".format( self.mount_dir.value), expect_rc=1) if 1 in retcodes: del retcodes[1] if len(retcodes): self.log.error('Errors checking running: %s', retcodes) if not fail_on_error: return False raise CommandFailure('dfuse not running') return True def stop(self): """Stop dfuse. Try to stop dfuse. Try once nicely by using fusermount, then if that fails try to pkill it to see if that works. Abort based on the result of the fusermount, as if pkill is necessary then dfuse itself has not worked correctly. Finally, try and remove the mount point, and that itself should work. Raises: CommandFailure: In case dfuse stop fails """ self.log.info('Stopping dfuse at %s on %s', self.mount_dir.value, self.running_hosts) if self.mount_dir.value is None: return if not len(self.running_hosts): return self.check_running() umount_cmd = [ "if [ -x '$(command -v fusermount)' ]", "then fusermount -u {0}".format(self.mount_dir.value), "else fusermount3 -u {0}".format(self.mount_dir.value), "fi" ] ret_code = pcmd(self.running_hosts, "; ".join(umount_cmd), timeout=30) if 0 in ret_code: self.running_hosts.remove(ret_code[0]) del ret_code[0] if len(self.running_hosts): cmd = "pkill dfuse --signal KILL" pcmd(self.running_hosts, cmd, timeout=30) pcmd(self.running_hosts, umount_cmd, timeout=30) self.remove_mount_point(fail=False) raise CommandFailure( "Error stopping dfuse on the following hosts: {}".format( self.running_hosts)) time.sleep(2) self.remove_mount_point()
class NfsGoldenImage(Image): NFS_DIRECTORY = '/diskless/images/nfsimages/golden/' # Class constructor def __init__(self, name, staging_image=None): super().__init__(name, staging_image) # Create new golden image def create_new_image(self, staging_image): super().create_new_image() # Set image attributes before creation self.kernel = staging_image.kernel self.image = 'initramfs-kernel-' + self.kernel.replace('vmlinuz-', '') self.password = staging_image.password self.release_version = staging_image.release_version self.nodes = NodeSet() self.NFS_DIRECTORY = NfsGoldenImage.NFS_DIRECTORY + self.name + '/' # Generate image files self.generate_files(staging_image) def get_existing_image(self): super().get_existing_image() # Convert string node set into NodeSet object self.nodes = NodeSet(self.nodes) # Generate golden image files def generate_files(self, staging_image): super().generate_files() self.create_image_folders() self.generate_file_system(staging_image) super().generate_ipxe_boot_file() def create_image_folders(self): super().create_image_folders() logging.debug('Executing \'mkdir -p ' + self.NFS_DIRECTORY + 'nodes\'') os.makedirs(self.NFS_DIRECTORY + 'nodes') def generate_file_system(self, staging_image): super().generate_file_system() logging.info('Cloning staging image to golden') logging.debug('Executing \'cp -a ' + staging_image.NFS_DIRECTORY + ' ' + self.NFS_DIRECTORY + 'image/\'') os.system('cp -a ' + staging_image.NFS_DIRECTORY + ' ' + self.NFS_DIRECTORY + 'image/') # List nodes associated with nfs golden image def get_nodes(self): return self.nodes # Add nodes to the image def add_nodes(self, nodes_range): logging.info('Cloning nodes, this may take some time...') # For each specified node for node in NodeSet(nodes_range): # If the node is not already in the nodeset if str(node) not in self.nodes: logging.info("Working on node: " + str(node)) # Copy golden base image for the specified nodes logging.debug('Executing \'cp -a ' + self.NFS_DIRECTORY + 'image/ ' + self.NFS_DIRECTORY + 'nodes/' + node + '\'') os.system('cp -a ' + self.NFS_DIRECTORY + 'image/ ' + self.NFS_DIRECTORY + 'nodes/' + node) # Updatde node list self.nodes.add(nodes_range) # Register image with new values self.register_image() # Remove nodes from the golden image def remove_nodes(self, nodes_range): try: self.nodes.remove(nodes_range) logging.info('Deleting nodes, this may take some time...') # For each node for node in NodeSet(nodes_range): logging.info('Working on node: ' + str(node)) # Remove node directory logging.debug('Executing \'rm -rf ' + self.NFS_DIRECTORY + '/nodes/' + node + '\'') shutil.rmtree(self.NFS_DIRECTORY + '/nodes/' + node) # Register image with new values self.register_image() except KeyError: raise KeyError("NodeSet to remove is not in image NodeSet !") # Remove files associated with the NFS image def remove_files(self): super().remove_files() logging.debug('Executing \'rm -rf ' + self.NFS_DIRECTORY + '\'') shutil.rmtree(self.NFS_DIRECTORY) # Clean all image files without image object when an image is corrupted @staticmethod def clean(image_name): Image.clean(image_name) if os.path.isdir(Image.IMAGES_DIRECTORY + image_name): logging.debug(Image.IMAGES_DIRECTORY + image_name + ' is a directory') logging.debug('Executing \'rm -rf ' + Image.IMAGES_DIRECTORY + image_name + '\'') shutil.rmtree(Image.IMAGES_DIRECTORY + image_name) if os.path.isdir(NfsGoldenImage.NFS_DIRECTORY + image_name): logging.debug(NfsGoldenImage.NFS_DIRECTORY + image_name + ' is a directory') logging.debug('Executing \'rm -rf ' + NfsGoldenImage.NFS_DIRECTORY + image_name + '\'') shutil.rmtree(NfsGoldenImage.NFS_DIRECTORY + image_name) @staticmethod def get_boot_file_template(): """Get the class boot file template. This method must be redefined in all Image subclasses.""" return '''#!ipxe