def get_pool_list(self, quantity, scm_ratio, nvme_ratio, svcn=None): """Get a list of TestPool objects. Set each TestPool's scm_size and nvme_size attributes using the specified ratios and the largest SCM or NVMe size common to all the configured servers. Args: quantity (int): number of TestPool objects to create scm_ratio (float): percentage of the maximum SCM capacity to use for the pool sizes, e.g. 0.9 for 90% nvme_ratio (float): percentage of the maximum NVMe capacity to use for the pool sizes, e.g. 0.9 for 90%. Specifying None will setup each pool without NVMe. svcn (int): Number of pool service replicas. The default value of None will use the default set on the server. Returns: list: a list of TestPool objects equal in length to the quantity specified, each configured with the same SCM and NVMe sizes. """ sizes = self.get_max_pool_sizes( scm_ratio, 1 if nvme_ratio is None else nvme_ratio) pool_list = [ self.get_pool(create=False, connect=False) for _ in range(quantity) ] for pool in pool_list: pool.svcn.update(svcn) pool.scm_size.update(bytes_to_human(sizes[0]), "scm_size") if nvme_ratio is not None: if sizes[1] is None: self.fail( "Unable to assign a max pool NVMe size; NVMe not " "configured!") # The I/O server allocates NVMe storage on targets in multiples # of 1GiB per target. A server with 8 targets will have a # minimum NVMe size of 8 GiB. Specify the largest NVMe size in # GiB that can be used with the configured number of targets and # specified capacity in GiB. targets = self.server_managers[0].get_config_value("targets") increment = human_to_bytes("{}GiB".format(targets)) nvme_multiple = increment while nvme_multiple + increment <= sizes[1]: nvme_multiple += increment self.log.info( "Largest NVMe multiple based on %s targets in %s: %s (%s)", targets, str(sizes[1]), str(nvme_multiple), bytes_to_human(nvme_multiple)) pool.nvme_size.update(bytes_to_human(nvme_multiple), "nvme_size") return pool_list
def get_max_pool_sizes(self, scm_ratio=0.9, nvme_ratio=0.9): """Get the maximum pool sizes for the current server configuration. Args: scm_ratio (float, optional): percentage of the maximum SCM capacity to use for the pool sizes. Defaults to 0.9 (90%). nvme_ratio (float, optional): percentage of the maximum NVMe capacity to use for the pool sizes. Defaults to 0.9 (90%). Returns: list: a list of bytes representing the maximum pool creation SCM size and NVMe size """ try: sizes = self.server_managers[0].get_available_storage() except ServerFailed as error: self.fail(error) ratios = (scm_ratio, nvme_ratio) for index, size in enumerate(sizes): if size and ratios[index] < 1: # Reduce the size by the specified percentage sizes[index] *= ratios[index] self.log.info("Adjusted %s size by %.2f%%: %s (%s)", "SCM" if index == 0 else "NVMe", 100 * ratios[index], str(sizes[index]), bytes_to_human(sizes[index])) return sizes
def get_available_storage(self): """Get the available SCM and NVMe storage. Raises: ServerFailed: if there was an error stopping the servers Returns: list: a list of the maximum available SCM and NVMe sizes in bytes """ def get_host_capacity(key, device_names): """Get the total storage capacity per host rank. Args: key (str): the capacity type, e.g. "scm" or "nvme" device_names (list): the device names of this capacity type Returns: dict: a dictionary of total storage capacity per host rank """ host_capacity = {} for host in data: device_sizes = [] for device in data[host][key]: if device in device_names: device_sizes.append( human_to_bytes( data[host][key][device]["capacity"])) host_capacity[host] = sum(device_sizes) return host_capacity # Default maximum bytes for SCM and NVMe storage = [0, 0] using_dcpm = self.manager.job.using_dcpm using_nvme = self.manager.job.using_nvme if using_dcpm or using_nvme: # Stop the DAOS IO servers in order to be able to scan the storage self.system_stop() # Scan all of the hosts for their SCM and NVMe storage self.dmg.hostlist = self._hosts data = self.dmg.storage_scan(verbose=True) self.dmg.hostlist = self.get_config_value("access_points") if self.dmg.result.exit_status != 0: raise ServerFailed("Error obtaining DAOS storage:\n{}".format( self.dmg.result)) # Restart the DAOS IO servers self.system_start() if using_dcpm: # Find the sizes of the configured SCM storage scm_devices = [ os.path.basename(path) for path in self.get_config_value("scm_list") if path ] capacity = get_host_capacity("scm", scm_devices) for host in sorted(capacity): self.log.info("SCM capacity for %s: %s", host, capacity[host]) # Use the minimum SCM storage across all servers storage[0] = capacity[min(capacity, key=capacity.get)] else: # Use the assigned scm_size scm_size = self.get_config_value("scm_size") storage[0] = human_to_bytes("{}GB".format(scm_size)) if using_nvme: # Find the sizes of the configured NVMe storage capacity = get_host_capacity("nvme", self.get_config_value("bdev_list")) for host in sorted(capacity): self.log.info("NVMe capacity for %s: %s", host, capacity[host]) # Use the minimum SCM storage across all servers storage[1] = capacity[min(capacity, key=capacity.get)] self.log.info( "Total available storage:\n SCM: %s (%s)\n NVMe: %s (%s)", str(storage[0]), bytes_to_human(storage[0], binary=False), str(storage[1]), bytes_to_human(storage[1], binary=False)) return storage
def autosize_pool_params(self, size, tier_ratio, scm_size, nvme_size, min_targets=1, quantity=1): """Update any pool size parameter ending in a %. Use the current NVMe and SCM storage sizes to assign values to the size, scm_size, and or nvme_size dmg pool create arguments which end in "%". The numerical part of these arguments will be used to assign a value that is X% of the available storage capacity. The updated size and nvme_size arguments will be assigned values that are multiples of 1GiB times the number of targets assigned to each server engine. If needed the number of targets will be reduced (to not exceed min_targets) in order to support the requested size. An optional number of expected pools (quantity) can also be specified to divide the available storage capacity. Note: depending upon the inputs this method may return dmg pool create parameter combinations that are not supported, e.g. tier_ratio + nvme_size. This is intended to allow testing of these combinations. Args: size (object): the str, int, or None value for the dmg pool create size parameter. tier_ratio (object): the int or None value for the dmg pool create size parameter. scm_size (object): the str, int, or None value for the dmg pool create scm_size parameter. nvme_size (object): the str, int, or None value for the dmg pool create nvme_size parameter. min_targets (int, optional): the minimum number of targets per engine that can be configured. Defaults to 1. quantity (int, optional): Number of pools to account for in the size calculations. The pool size returned is only for a single pool. Defaults to 1. Raises: ServerFailed: if there was a error obtaining auto-sized TestPool parameters. AutosizeCancel: if a valid pool parameter size could not be obtained Returns: dict: the parameters for a TestPool object. """ # Adjust any pool size parameter by the requested percentage params = {"tier_ratio": tier_ratio} adjusted = {"size": size, "scm_size": scm_size, "nvme_size": nvme_size} keys = [ key for key in ("size", "scm_size", "nvme_size") if adjusted[key] is not None and str(adjusted[key]).endswith("%")] if keys: # Verify the minimum number of targets configured per engine targets = min(self.manager.job.get_engine_values("targets")) if targets < min_targets: raise ServerFailed( "Minimum target quantity ({}) exceeds current target " "quantity ({})".format(min_targets, targets)) self.log.info("-" * 100) pool_msg = "{} pool{}".format(quantity, "s" if quantity > 1 else "") self.log.info( "Autosizing TestPool parameters ending with a \"%%\" for %s:", pool_msg) for key in ("size", "scm_size", "nvme_size"): self.log.info(" - %-9s : %s (%s)", key, adjusted[key], key in keys) # Determine the largest SCM and NVMe pool sizes can be used with # this server configuration with an optionally applied ratio. try: available_storage = self.get_available_storage() except ServerFailed as error: raise ServerFailed("Error obtaining available storage") from error # Determine the SCM and NVMe size limits for the size and tier_ratio # arguments for the total number of engines if tier_ratio is None: # Use the default value if not provided tier_ratio = 6 engine_qty = len(self.manager.job.engine_params) * len(self._hosts) available_storage["size"] = min( engine_qty * available_storage["nvme"], (engine_qty * available_storage["scm"]) / float(tier_ratio / 100) ) available_storage["tier_ratio"] = available_storage["size"] * float(tier_ratio / 100) self.log.info( "Largest storage size available for %s engines with a %.2f%% " "tier_ratio:", engine_qty, tier_ratio) self.log.info( " - NVME : %s", get_display_size(available_storage["size"])) self.log.info( " - SCM : %s", get_display_size(available_storage["tier_ratio"])) self.log.info( " - COMBINED : %s", get_display_size(available_storage["size"] + available_storage["tier_ratio"])) # Apply any requested percentages to the pool parameters available = { "size": {"size": available_storage["size"], "type": "NVMe"}, "scm_size": {"size": available_storage["scm"], "type": "SCM"}, "nvme_size": {"size": available_storage["nvme"], "type": "NVMe"} } self.log.info("Adjusted pool sizes for %s:", pool_msg) for key in keys: try: ratio = int(str(adjusted[key]).replace("%", "")) except NameError as error: raise ServerFailed( "Invalid '{}' format: {}".format(key, adjusted[key])) from error adjusted[key] = (available[key]["size"] * float(ratio / 100)) / quantity self.log.info( " - %-9s : %-4s storage adjusted by %.2f%%: %s", key, available[key]["type"], ratio, get_display_size(adjusted[key])) # Display the pool size increment value for each size argument increment = { "size": human_to_bytes("1GiB"), "scm_size": human_to_bytes("16MiB"), "nvme_size": human_to_bytes("1GiB")} self.log.info("Increment sizes per target:") for key in keys: self.log.info(" - %-9s : %s", key, get_display_size(increment[key])) # Adjust the size to use a SCM/NVMe target multiplier self.log.info("Pool sizes adjusted to fit by increment sizes:") adjusted_targets = targets for key in keys: multiplier = math.floor(adjusted[key] / increment[key]) params[key] = multiplier * increment[key] self.log.info( " - %-9s : %s * %s = %s", key, multiplier, increment[key], get_display_size(params[key])) if multiplier < adjusted_targets: adjusted_targets = multiplier if adjusted_targets < min_targets: raise AutosizeCancel( "Unable to autosize the {} pool parameter due to " "exceeding the minimum of {} targets: {}".format( key, min_targets, adjusted_targets)) if key == "size": tier_ratio_size = params[key] * float(tier_ratio / 100) self.log.info( " - %-9s : %.2f%% tier_ratio = %s", key, tier_ratio, get_display_size(tier_ratio_size)) params[key] += tier_ratio_size self.log.info( " - %-9s : NVMe + SCM = %s", key, get_display_size(params[key])) params[key] = bytes_to_human(params[key], binary=True) # Reboot the servers if a reduced number of targets is required if adjusted_targets < targets: self.log.info( "Updating targets per server engine: %s -> %s", targets, adjusted_targets) self.set_config_value("targets", adjusted_targets) self.stop() self.start() self.log.info("-" * 100) return params